All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.druid.segment.incremental.IncrementalIndexStorageAdapter Maven / Gradle / Ivy

There is a newer version: 30.0.1
Show newest version
/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *   http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing,
 * software distributed under the License is distributed on an
 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 * KIND, either express or implied.  See the License for the
 * specific language governing permissions and limitations
 * under the License.
 */

package org.apache.druid.segment.incremental;

import com.google.common.collect.ImmutableList;
import com.google.common.collect.Iterators;
import com.google.common.collect.Lists;
import org.apache.druid.java.util.common.granularity.Granularity;
import org.apache.druid.java.util.common.guava.Sequence;
import org.apache.druid.java.util.common.guava.Sequences;
import org.apache.druid.query.BaseQuery;
import org.apache.druid.query.QueryMetrics;
import org.apache.druid.query.filter.Filter;
import org.apache.druid.query.filter.ValueMatcher;
import org.apache.druid.segment.ColumnSelectorFactory;
import org.apache.druid.segment.Cursor;
import org.apache.druid.segment.DimensionDictionarySelector;
import org.apache.druid.segment.DimensionIndexer;
import org.apache.druid.segment.Metadata;
import org.apache.druid.segment.NestedDataColumnIndexerV4;
import org.apache.druid.segment.StorageAdapter;
import org.apache.druid.segment.VirtualColumns;
import org.apache.druid.segment.column.ColumnCapabilities;
import org.apache.druid.segment.column.ColumnCapabilitiesImpl;
import org.apache.druid.segment.column.ColumnHolder;
import org.apache.druid.segment.column.ColumnType;
import org.apache.druid.segment.data.Indexed;
import org.apache.druid.segment.data.ListIndexed;
import org.apache.druid.segment.filter.ValueMatchers;
import org.joda.time.DateTime;
import org.joda.time.Interval;

import javax.annotation.Nullable;
import java.util.Iterator;

/**
 */
public class IncrementalIndexStorageAdapter implements StorageAdapter
{
  private static final ColumnCapabilities.CoercionLogic STORAGE_ADAPTER_CAPABILITIES_COERCE_LOGIC =
      new ColumnCapabilities.CoercionLogic()
      {
        @Override
        public boolean dictionaryEncoded()
        {
          return false;
        }

        @Override
        public boolean dictionaryValuesSorted()
        {
          return false;
        }

        @Override
        public boolean dictionaryValuesUnique()
        {
          return true;
        }

        @Override
        public boolean multipleValues()
        {
          return true;
        }

        @Override
        public boolean hasNulls()
        {
          return true;
        }
      };

  private static final ColumnCapabilities.CoercionLogic SNAPSHOT_STORAGE_ADAPTER_CAPABILITIES_COERCE_LOGIC =
      new ColumnCapabilities.CoercionLogic()
      {
        @Override
        public boolean dictionaryEncoded()
        {
          return true;
        }

        @Override
        public boolean dictionaryValuesSorted()
        {
          return true;
        }

        @Override
        public boolean dictionaryValuesUnique()
        {
          return true;
        }

        @Override
        public boolean multipleValues()
        {
          return false;
        }

        @Override
        public boolean hasNulls()
        {
          return false;
        }
      };

  final IncrementalIndex index;

  public IncrementalIndexStorageAdapter(IncrementalIndex index)
  {
    this.index = index;
  }

  @Override
  public Interval getInterval()
  {
    return index.getInterval();
  }

  @Override
  public Indexed getAvailableDimensions()
  {
    return new ListIndexed<>(index.getDimensionNames());
  }

  @Override
  public Iterable getAvailableMetrics()
  {
    return index.getMetricNames();
  }

  @Override
  public int getDimensionCardinality(String dimension)
  {
    if (dimension.equals(ColumnHolder.TIME_COLUMN_NAME)) {
      return DimensionDictionarySelector.CARDINALITY_UNKNOWN;
    }

    IncrementalIndex.DimensionDesc desc = index.getDimension(dimension);
    if (desc == null) {
      return 0;
    }

    return desc.getIndexer().getCardinality();
  }

  @Override
  public int getNumRows()
  {
    return index.size();
  }

  @Override
  public DateTime getMinTime()
  {
    return index.getMinTime();
  }

  @Override
  public DateTime getMaxTime()
  {
    return index.getMaxTime();
  }

  @Nullable
  @Override
  public Comparable getMinValue(String column)
  {
    IncrementalIndex.DimensionDesc desc = index.getDimension(column);
    if (desc == null) {
      return null;
    }

    DimensionIndexer indexer = desc.getIndexer();
    return indexer.getMinValue();
  }

  @Nullable
  @Override
  public Comparable getMaxValue(String column)
  {
    IncrementalIndex.DimensionDesc desc = index.getDimension(column);
    if (desc == null) {
      return null;
    }

    DimensionIndexer indexer = desc.getIndexer();
    return indexer.getMaxValue();
  }

  @Override
  public ColumnCapabilities getColumnCapabilities(String column)
  {
    IncrementalIndex.DimensionDesc desc = index.getDimension(column);
    // nested column indexer is a liar, and behaves like any type if it only processes unnested literals of a single
    // type, so force it to use nested column type
    if (desc != null && desc.getIndexer() instanceof NestedDataColumnIndexerV4) {
      return ColumnCapabilitiesImpl.createDefault().setType(ColumnType.NESTED_DATA);
    }

    // Different from index.getColumnCapabilities because, in a way, IncrementalIndex's string-typed dimensions
    // are always potentially multi-valued at query time. (Missing / null values for a row can potentially be
    // represented by an empty array; see StringDimensionIndexer.IndexerDimensionSelector's getRow method.)
    //
    // We don't want to represent this as having-multiple-values in index.getCapabilities, because that's used
    // at index-persisting time to determine if we need a multi-value column or not. However, that means we
    // need to tweak the capabilities here in the StorageAdapter (a query-time construct), so at query time
    // they appear multi-valued.
    //
    // Note that this could be improved if we snapshot the capabilities at cursor creation time and feed those through
    // to the StringDimensionIndexer so the selector built on top of it can produce values from the snapshot state of
    // multi-valuedness at cursor creation time, instead of the latest state, and getSnapshotColumnCapabilities could
    // be removed.
    return ColumnCapabilitiesImpl.snapshot(
        index.getColumnCapabilities(column),
        STORAGE_ADAPTER_CAPABILITIES_COERCE_LOGIC
    );
  }

  /**
   * Sad workaround for {@link org.apache.druid.query.metadata.SegmentAnalyzer} to deal with the fact that the
   * response from {@link #getColumnCapabilities} is not accurate for string columns, in that it reports all string
   * columns as having multiple values. This method returns the actual capabilities of the underlying
   * {@link IncrementalIndex} at the time this method is called.
   */
  public ColumnCapabilities getSnapshotColumnCapabilities(String column)
  {
    return ColumnCapabilitiesImpl.snapshot(
        index.getColumnCapabilities(column),
        SNAPSHOT_STORAGE_ADAPTER_CAPABILITIES_COERCE_LOGIC
    );
  }

  @Override
  public DateTime getMaxIngestedEventTime()
  {
    return index.getMaxIngestedEventTime();
  }

  @Override
  public Sequence makeCursors(
      @Nullable final Filter filter,
      final Interval interval,
      final VirtualColumns virtualColumns,
      final Granularity gran,
      final boolean descending,
      @Nullable QueryMetrics queryMetrics
  )
  {
    if (index.isEmpty()) {
      return Sequences.empty();
    }

    if (queryMetrics != null) {
      queryMetrics.vectorized(false);
    }

    final Interval dataInterval = new Interval(getMinTime(), gran.bucketEnd(getMaxTime()));

    if (!interval.overlaps(dataInterval)) {
      return Sequences.empty();
    }
    final Interval actualInterval = interval.overlap(dataInterval);
    Iterable intervals = gran.getIterable(actualInterval);
    if (descending) {
      intervals = Lists.reverse(ImmutableList.copyOf(intervals));
    }

    return Sequences
        .simple(intervals)
        .map(i -> new IncrementalIndexCursor(virtualColumns, descending, filter, i, actualInterval, gran));
  }

  @Override
  public Metadata getMetadata()
  {
    return index.getMetadata();
  }

  private class IncrementalIndexCursor implements Cursor
  {
    private IncrementalIndexRowHolder currEntry;
    private final ColumnSelectorFactory columnSelectorFactory;
    private final ValueMatcher filterMatcher;
    private final int maxRowIndex;
    private Iterator baseIter;
    private Iterable cursorIterable;
    private boolean emptyRange;
    private final DateTime time;
    private int numAdvanced;
    private boolean done;

    IncrementalIndexCursor(
        VirtualColumns virtualColumns,
        boolean descending,
        Filter filter,
        Interval interval,
        Interval actualInterval,
        Granularity gran
    )
    {
      currEntry = new IncrementalIndexRowHolder();
      columnSelectorFactory = new IncrementalIndexColumnSelectorFactory(
          IncrementalIndexStorageAdapter.this,
          virtualColumns,
          descending,
          currEntry
      );
      // Set maxRowIndex before creating the filterMatcher. See https://github.com/apache/druid/pull/6340
      maxRowIndex = index.getLastRowIndex();
      filterMatcher = filter == null ? ValueMatchers.allTrue() : filter.makeMatcher(columnSelectorFactory);
      numAdvanced = -1;
      final long timeStart = Math.max(interval.getStartMillis(), actualInterval.getStartMillis());
      cursorIterable = index.getFacts().timeRangeIterable(
          descending,
          timeStart,
          Math.min(actualInterval.getEndMillis(), gran.increment(interval.getStartMillis()))
      );
      emptyRange = !cursorIterable.iterator().hasNext();
      time = gran.toDateTime(interval.getStartMillis());

      reset();
    }

    @Override
    public ColumnSelectorFactory getColumnSelectorFactory()
    {
      return columnSelectorFactory;
    }

    @Override
    public DateTime getTime()
    {
      return time;
    }

    @Override
    public void advance()
    {
      if (!baseIter.hasNext()) {
        done = true;
        return;
      }

      while (baseIter.hasNext()) {
        BaseQuery.checkInterrupted();

        IncrementalIndexRow entry = baseIter.next();
        if (beyondMaxRowIndex(entry.getRowIndex())) {
          continue;
        }

        currEntry.set(entry);

        if (filterMatcher.matches(false)) {
          return;
        }
      }

      done = true;
    }

    @Override
    public void advanceUninterruptibly()
    {
      if (!baseIter.hasNext()) {
        done = true;
        return;
      }

      while (baseIter.hasNext()) {
        if (Thread.currentThread().isInterrupted()) {
          return;
        }

        IncrementalIndexRow entry = baseIter.next();
        if (beyondMaxRowIndex(entry.getRowIndex())) {
          continue;
        }

        currEntry.set(entry);

        if (filterMatcher.matches(false)) {
          return;
        }
      }

      done = true;
    }

    @Override
    public boolean isDone()
    {
      return done;
    }

    @Override
    public boolean isDoneOrInterrupted()
    {
      return isDone() || Thread.currentThread().isInterrupted();
    }

    @Override
    public void reset()
    {
      baseIter = cursorIterable.iterator();

      if (numAdvanced == -1) {
        numAdvanced = 0;
      } else {
        Iterators.advance(baseIter, numAdvanced);
      }

      BaseQuery.checkInterrupted();

      boolean foundMatched = false;
      while (baseIter.hasNext()) {
        IncrementalIndexRow entry = baseIter.next();
        if (beyondMaxRowIndex(entry.getRowIndex())) {
          numAdvanced++;
          continue;
        }
        currEntry.set(entry);
        if (filterMatcher.matches(false)) {
          foundMatched = true;
          break;
        }

        numAdvanced++;
      }

      done = !foundMatched && (emptyRange || !baseIter.hasNext());
    }

    private boolean beyondMaxRowIndex(int rowIndex)
    {
      // ignore rows whose rowIndex is beyond the maxRowIndex
      // rows are order by timestamp, not rowIndex,
      // so we still need to go through all rows to skip rows added after cursor created
      return rowIndex > maxRowIndex;
    }
  }
}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy