org.apache.druid.server.SegmentManager Maven / Gradle / Ivy

Go to download
Show more of this group Show more artifacts with this name
Show all versions of druid-server Show documentation
Druid Server
The newest version!
/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *   http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing,
 * software distributed under the License is distributed on an
 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 * KIND, either express or implied.  See the License for the
 * specific language governing permissions and limitations
 * under the License.
 */

package org.apache.druid.server;

import com.google.common.annotations.VisibleForTesting;
import com.google.common.collect.Ordering;
import com.google.inject.Inject;
import org.apache.druid.common.guava.SettableSupplier;
import org.apache.druid.java.util.common.ISE;
import org.apache.druid.java.util.common.Intervals;
import org.apache.druid.java.util.common.io.Closer;
import org.apache.druid.java.util.emitter.EmittingLogger;
import org.apache.druid.query.TableDataSource;
import org.apache.druid.query.planning.DataSourceAnalysis;
import org.apache.druid.segment.PhysicalSegmentInspector;
import org.apache.druid.segment.ReferenceCountingSegment;
import org.apache.druid.segment.SegmentLazyLoadFailCallback;
import org.apache.druid.segment.join.table.IndexedTable;
import org.apache.druid.segment.join.table.ReferenceCountingIndexedTable;
import org.apache.druid.segment.loading.SegmentCacheManager;
import org.apache.druid.segment.loading.SegmentLoadingException;
import org.apache.druid.server.metrics.SegmentRowCountDistribution;
import org.apache.druid.timeline.DataSegment;
import org.apache.druid.timeline.SegmentId;
import org.apache.druid.timeline.VersionedIntervalTimeline;
import org.apache.druid.timeline.partition.PartitionChunk;
import org.apache.druid.timeline.partition.ShardSpec;
import org.apache.druid.utils.CollectionUtils;

import java.io.IOException;
import java.util.List;
import java.util.Map;
import java.util.Objects;
import java.util.Optional;
import java.util.Set;
import java.util.concurrent.ConcurrentHashMap;
import java.util.function.Consumer;
import java.util.stream.Stream;
import java.util.stream.StreamSupport;

/**
 * This class is responsible for managing data sources and their states like timeline, total segment size, and number of
 * segments. All public methods of this class must be thread-safe.
 */
public class SegmentManager
{
  private static final EmittingLogger log = new EmittingLogger(SegmentManager.class);

  private final SegmentCacheManager cacheManager;

  private final ConcurrentHashMap dataSources = new ConcurrentHashMap<>();

  /**
   * Represent the state of a data source including the timeline, total segment size, and number of segments.
   */
  public static class DataSourceState
  {
    private final VersionedIntervalTimeline timeline =
        new VersionedIntervalTimeline<>(Ordering.natural());

    private final ConcurrentHashMap tablesLookup = new ConcurrentHashMap<>();
    private long totalSegmentSize;
    private long numSegments;
    private long rowCount;
    private final SegmentRowCountDistribution segmentRowCountDistribution = new SegmentRowCountDistribution();

    private void addSegment(DataSegment segment, long numOfRows)
    {
      totalSegmentSize += segment.getSize();
      numSegments++;
      rowCount += (numOfRows);
      if (segment.isTombstone()) {
        segmentRowCountDistribution.addTombstoneToDistribution();
      } else {
        segmentRowCountDistribution.addRowCountToDistribution(numOfRows);
      }
    }

    private void removeSegment(DataSegment segment, long numOfRows)
    {
      totalSegmentSize -= segment.getSize();
      numSegments--;
      rowCount -= numOfRows;
      if (segment.isTombstone()) {
        segmentRowCountDistribution.removeTombstoneFromDistribution();
      } else {
        segmentRowCountDistribution.removeRowCountFromDistribution(numOfRows);
      }
    }

    public VersionedIntervalTimeline getTimeline()
    {
      return timeline;
    }

    public ConcurrentHashMap getTablesLookup()
    {
      return tablesLookup;
    }

    public long getAverageRowCount()
    {
      return numSegments == 0 ? 0 : rowCount / numSegments;
    }

    public long getTotalSegmentSize()
    {
      return totalSegmentSize;
    }

    public long getNumSegments()
    {
      return numSegments;
    }

    public boolean isEmpty()
    {
      return numSegments == 0;
    }

    private SegmentRowCountDistribution getSegmentRowCountDistribution()
    {
      return segmentRowCountDistribution;
    }
  }

  @Inject
  public SegmentManager(SegmentCacheManager cacheManager)
  {
    this.cacheManager = cacheManager;
  }

  @VisibleForTesting
  Map getDataSources()
  {
    return dataSources;
  }

  /**
   * Returns a map of dataSource to the total byte size of segments managed by this segmentManager.  This method should
   * be used carefully because the returned map might be different from the actual data source states.
   *
   * @return a map of dataSources and their total byte sizes
   */
  public Map getDataSourceSizes()
  {
    return CollectionUtils.mapValues(dataSources, SegmentManager.DataSourceState::getTotalSegmentSize);
  }

  public Map getAverageRowCountForDatasource()
  {
    return CollectionUtils.mapValues(dataSources, SegmentManager.DataSourceState::getAverageRowCount);
  }

  public Map getRowCountDistribution()
  {
    return CollectionUtils.mapValues(dataSources, SegmentManager.DataSourceState::getSegmentRowCountDistribution);
  }

  public Set getDataSourceNames()
  {
    return dataSources.keySet();
  }

  /**
   * Returns a map of dataSource to the number of segments managed by this segmentManager.  This method should be
   * carefully because the returned map might be different from the actual data source states.
   *
   * @return a map of dataSources and number of segments
   */
  public Map getDataSourceCounts()
  {
    return CollectionUtils.mapValues(dataSources, SegmentManager.DataSourceState::getNumSegments);
  }

  /**
   * Returns the timeline for a datasource, if it exists. The analysis object passed in must represent a scan-based
   * datasource of a single table.
   *
   * @param analysis data source analysis information
   *
   * @return timeline, if it exists
   *
   * @throws IllegalStateException if 'analysis' does not represent a scan-based datasource of a single table
   */
  public Optional> getTimeline(DataSourceAnalysis analysis)
  {
    final TableDataSource tableDataSource = getTableDataSource(analysis);
    return Optional.ofNullable(dataSources.get(tableDataSource.getName())).map(DataSourceState::getTimeline);
  }

  /**
   * Returns the collection of {@link IndexedTable} for the entire timeline (since join conditions do not currently
   * consider the queries intervals), if the timeline exists for each of its segments that are joinable.
   */
  public Optional> getIndexedTables(DataSourceAnalysis analysis)
  {
    return getTimeline(analysis).map(timeline -> {
      // join doesn't currently consider intervals, so just consider all segments
      final Stream segments =
          timeline.lookup(Intervals.ETERNITY)
                  .stream()
                  .flatMap(x -> StreamSupport.stream(x.getObject().payloads().spliterator(), false));
      final TableDataSource tableDataSource = getTableDataSource(analysis);
      ConcurrentHashMap tables =
          Optional.ofNullable(dataSources.get(tableDataSource.getName())).map(DataSourceState::getTablesLookup)
                  .orElseThrow(() -> new ISE("Datasource %s does not have IndexedTables", tableDataSource.getName()));
      return segments.map(segment -> tables.get(segment.getId())).filter(Objects::nonNull);
    });
  }

  public boolean hasIndexedTables(String dataSourceName)
  {
    if (dataSources.containsKey(dataSourceName)) {
      return dataSources.get(dataSourceName).tablesLookup.size() > 0;
    }
    return false;
  }

  private TableDataSource getTableDataSource(DataSourceAnalysis analysis)
  {
    return analysis.getBaseTableDataSource()
                   .orElseThrow(() -> new ISE("Cannot handle datasource: %s", analysis.getBaseDataSource()));
  }

  /**
   * Load the supplied segment into page cache on bootstrap. If the segment is already loaded, this method does not
   * reload the segment into the page cache.
   *
   * @param dataSegment segment to bootstrap
   * @param loadFailed callback to execute when segment lazy load fails. This applies only
   *                   when lazy loading is enabled.
   *
   * @throws SegmentLoadingException if the segment cannot be loaded
   * @throws IOException if the segment info cannot be cached on disk
   */
  public void loadSegmentOnBootstrap(
      final DataSegment dataSegment,
      final SegmentLazyLoadFailCallback loadFailed
  ) throws SegmentLoadingException, IOException
  {
    final ReferenceCountingSegment segment;
    try {
      segment = cacheManager.getBootstrapSegment(dataSegment, loadFailed);
      if (segment == null) {
        throw new SegmentLoadingException(
            "No segment adapter found for bootstrap segment[%s] with loadSpec[%s].",
            dataSegment.getId(), dataSegment.getLoadSpec()
        );
      }
    }
    catch (SegmentLoadingException e) {
      cacheManager.cleanup(dataSegment);
      throw e;
    }
    loadSegment(dataSegment, segment, cacheManager::loadSegmentIntoPageCacheOnBootstrap);
  }

  /**
   * Load the supplied segment into page cache. If the segment is already loaded, this method does not reload the
   * segment into the page cache. This method should be called for non-bootstrapping flows. Unlike
   * {@link #loadSegmentOnBootstrap(DataSegment, SegmentLazyLoadFailCallback)}, this method doesn't accept a lazy load
   * fail callback because the segment is loaded immediately.
   *
   * @param dataSegment segment to load
   *
   * @throws SegmentLoadingException if the segment cannot be loaded
   * @throws IOException if the segment info cannot be cached on disk
   */
  public void loadSegment(final DataSegment dataSegment) throws SegmentLoadingException, IOException
  {
    final ReferenceCountingSegment segment;
    try {
      segment = cacheManager.getSegment(dataSegment);
      if (segment == null) {
        throw new SegmentLoadingException(
            "No segment adapter found for segment[%s] with loadSpec[%s].",
            dataSegment.getId(), dataSegment.getLoadSpec()
        );
      }
    }
    catch (SegmentLoadingException e) {
      cacheManager.cleanup(dataSegment);
      throw e;
    }
    loadSegment(dataSegment, segment, cacheManager::loadSegmentIntoPageCache);
  }

  private void loadSegment(
      final DataSegment dataSegment,
      final ReferenceCountingSegment segment,
      final Consumer pageCacheLoadFunction
  ) throws IOException
  {
    final SettableSupplier resultSupplier = new SettableSupplier<>();

    // compute() is used to ensure that the operation for a data source is executed atomically
    dataSources.compute(
        dataSegment.getDataSource(),
        (k, v) -> {
          final DataSourceState dataSourceState = v == null ? new DataSourceState() : v;
          final VersionedIntervalTimeline loadedIntervals =
              dataSourceState.getTimeline();
          final PartitionChunk entry = loadedIntervals.findChunk(
              dataSegment.getInterval(),
              dataSegment.getVersion(),
              dataSegment.getShardSpec().getPartitionNum()
          );

          if (entry != null) {
            log.warn("Told to load an adapter for segment[%s] that already exists", dataSegment.getId());
            resultSupplier.set(false);
          } else {
            final IndexedTable table = segment.as(IndexedTable.class);
            if (table != null) {
              if (dataSourceState.isEmpty() || dataSourceState.numSegments == dataSourceState.tablesLookup.size()) {
                dataSourceState.tablesLookup.put(segment.getId(), new ReferenceCountingIndexedTable(table));
              } else {
                log.error("Cannot load segment[%s] with IndexedTable, no existing segments are joinable", segment.getId());
              }
            } else if (dataSourceState.tablesLookup.size() > 0) {
              log.error("Cannot load segment[%s] without IndexedTable, all existing segments are joinable", segment.getId());
            }
            loadedIntervals.add(
                dataSegment.getInterval(),
                dataSegment.getVersion(),
                dataSegment.getShardSpec().createChunk(segment)
            );
            final PhysicalSegmentInspector countInspector = segment.as(PhysicalSegmentInspector.class);
            final long numOfRows;
            if (dataSegment.isTombstone() || countInspector == null) {
              numOfRows = 0;
            } else {
              numOfRows = countInspector.getNumRows();
            }
            dataSourceState.addSegment(dataSegment, numOfRows);

            pageCacheLoadFunction.accept(dataSegment);
            resultSupplier.set(true);
          }

          return dataSourceState;
        }
    );
    final boolean loadResult = resultSupplier.get();
    if (loadResult) {
      cacheManager.storeInfoFile(dataSegment);
    }
  }

  public void dropSegment(final DataSegment segment)
  {
    final String dataSource = segment.getDataSource();

    // compute() is used to ensure that the operation for a data source is executed atomically
    dataSources.compute(
        dataSource,
        (dataSourceName, dataSourceState) -> {
          if (dataSourceState == null) {
            log.info("Told to delete a queryable for a dataSource[%s] that doesn't exist.", dataSourceName);
            return null;
          } else {
            final VersionedIntervalTimeline loadedIntervals =
                dataSourceState.getTimeline();

            final ShardSpec shardSpec = segment.getShardSpec();
            final PartitionChunk removed = loadedIntervals.remove(
                segment.getInterval(),
                segment.getVersion(),
                // remove() internally searches for a partitionChunk to remove which is *equal* to the given
                // partitionChunk. Note that partitionChunk.equals() checks only the partitionNum, but not the object.
                segment.getShardSpec().createChunk(ReferenceCountingSegment.wrapSegment(null, shardSpec))
            );
            final ReferenceCountingSegment oldQueryable = (removed == null) ? null : removed.getObject();

            if (oldQueryable != null) {
              try (final Closer closer = Closer.create()) {
                final PhysicalSegmentInspector countInspector = oldQueryable.as(PhysicalSegmentInspector.class);
                final long numOfRows;
                if (segment.isTombstone() || countInspector == null) {
                  numOfRows = 0;
                } else {
                  numOfRows = countInspector.getNumRows();
                }
                dataSourceState.removeSegment(segment, numOfRows);

                closer.register(oldQueryable);
                log.info("Attempting to close segment[%s]", segment.getId());
                final ReferenceCountingIndexedTable oldTable = dataSourceState.tablesLookup.remove(segment.getId());
                if (oldTable != null) {
                  closer.register(oldTable);
                }
              }
              catch (IOException e) {
                throw new RuntimeException(e);
              }
            } else {
              log.info(
                  "Told to delete a queryable on dataSource[%s] for interval[%s] and version[%s] that I don't have.",
                  dataSourceName,
                  segment.getInterval(),
                  segment.getVersion()
              );
            }

            // Returning null removes the entry of dataSource from the map
            return dataSourceState.isEmpty() ? null : dataSourceState;
          }
        }
    );

    cacheManager.removeInfoFile(segment);
    cacheManager.cleanup(segment);
  }

  /**
   * Return whether the cache manager can handle segments or not.
   */
  public boolean canHandleSegments()
  {
    return cacheManager.canHandleSegments();
  }

  /**
   * Return a list of cached segments, if any. This should be called only when
   * {@link #canHandleSegments()} is true.
   */
  public List getCachedSegments() throws IOException
  {
    return cacheManager.getCachedSegments();
  }

  /**
   * Shutdown the bootstrap executor to save resources.
   * This should be called after loading bootstrap segments into the page cache.
   */
  public void shutdownBootstrap()
  {
    cacheManager.shutdownBootstrap();
  }
}