![JAR search and dependency download from the Maven repository](/logo.png)
org.apache.druid.segment.metadata.AbstractSegmentMetadataCache Maven / Gradle / Ivy
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.apache.druid.segment.metadata;
import com.fasterxml.jackson.annotation.JsonCreator;
import com.google.common.annotations.VisibleForTesting;
import com.google.common.base.Preconditions;
import com.google.common.base.Predicates;
import com.google.common.base.Stopwatch;
import com.google.common.collect.FluentIterable;
import com.google.common.collect.ImmutableMap;
import com.google.common.collect.ImmutableSet;
import com.google.common.collect.Interner;
import com.google.common.collect.Interners;
import com.google.common.collect.Iterables;
import com.google.common.collect.Maps;
import com.google.errorprone.annotations.concurrent.GuardedBy;
import org.apache.druid.client.InternalQueryConfig;
import org.apache.druid.java.util.common.DateTimes;
import org.apache.druid.java.util.common.IAE;
import org.apache.druid.java.util.common.ISE;
import org.apache.druid.java.util.common.concurrent.Execs;
import org.apache.druid.java.util.common.guava.Sequence;
import org.apache.druid.java.util.common.guava.Yielder;
import org.apache.druid.java.util.common.guava.Yielders;
import org.apache.druid.java.util.emitter.EmittingLogger;
import org.apache.druid.java.util.emitter.service.ServiceEmitter;
import org.apache.druid.java.util.emitter.service.ServiceMetricEvent;
import org.apache.druid.query.DruidMetrics;
import org.apache.druid.query.QueryContexts;
import org.apache.druid.query.TableDataSource;
import org.apache.druid.query.metadata.metadata.AllColumnIncluderator;
import org.apache.druid.query.metadata.metadata.ColumnAnalysis;
import org.apache.druid.query.metadata.metadata.SegmentAnalysis;
import org.apache.druid.query.metadata.metadata.SegmentMetadataQuery;
import org.apache.druid.query.spec.MultipleSpecificSegmentSpec;
import org.apache.druid.segment.column.ColumnType;
import org.apache.druid.segment.column.RowSignature;
import org.apache.druid.segment.column.Types;
import org.apache.druid.server.QueryLifecycleFactory;
import org.apache.druid.server.coordination.DruidServerMetadata;
import org.apache.druid.server.coordination.ServerType;
import org.apache.druid.server.security.Access;
import org.apache.druid.server.security.Escalator;
import org.apache.druid.timeline.DataSegment;
import org.apache.druid.timeline.SegmentId;
import javax.annotation.Nullable;
import java.io.IOException;
import java.util.Comparator;
import java.util.EnumSet;
import java.util.HashSet;
import java.util.Iterator;
import java.util.LinkedHashMap;
import java.util.Map;
import java.util.Objects;
import java.util.Optional;
import java.util.Set;
import java.util.TreeMap;
import java.util.TreeSet;
import java.util.concurrent.ConcurrentHashMap;
import java.util.concurrent.ConcurrentMap;
import java.util.concurrent.ConcurrentSkipListMap;
import java.util.concurrent.CountDownLatch;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.TimeUnit;
import java.util.concurrent.atomic.AtomicBoolean;
import java.util.function.Function;
import java.util.stream.Collectors;
import java.util.stream.StreamSupport;
/**
* An abstract class that listens for segment change events and caches segment metadata. It periodically refreshes
* the segments, by fetching their metadata which includes schema information from sources like
* data nodes, tasks, metadata database and builds table schema.
*
* At startup, the cache awaits the initialization of the timeline.
* If the cache employs a segment metadata query to retrieve segment schema, it attempts to refresh a maximum
* of {@code MAX_SEGMENTS_PER_QUERY} segments for each datasource in each refresh cycle.
* Once all datasources have undergone this process, the initial schema of each datasource is constructed,
* and the cache is marked as initialized.
* Subsequently, the cache continues to periodically refresh segments and update the datasource schema.
* It is also important to note that a failure in segment refresh results in pausing the refresh work,
* and the process is resumed in the next refresh cycle.
*
* This class has an abstract method {@link #refresh(Set, Set)} which the child class must override
* with the logic to build and cache table schema.
*
* Note on handling tombstone segments:
* These segments lack data or column information.
* Additionally, segment metadata queries, which are not yet implemented for tombstone segments
* (see: https://github.com/apache/druid/pull/12137) do not provide metadata for tombstones,
* leading to indefinite refresh attempts for these segments.
* Therefore, these segments are never added to the set of segments being refreshed.
*
* @param The type of information associated with the data source, which must extend {@link DataSourceInformation}.
*/
public abstract class AbstractSegmentMetadataCache
{
private static final EmittingLogger log = new EmittingLogger(AbstractSegmentMetadataCache.class);
private static final int MAX_SEGMENTS_PER_QUERY = 15000;
private static final long DEFAULT_NUM_ROWS = 0;
private final QueryLifecycleFactory queryLifecycleFactory;
private final SegmentMetadataCacheConfig config;
// Escalator, so we can attach an authentication result to queries we generate.
private final Escalator escalator;
private final ColumnTypeMergePolicy columnTypeMergePolicy;
// For awaitInitialization.
private final CountDownLatch initialized = new CountDownLatch(1);
// Configured context to attach to internally generated queries.
private final InternalQueryConfig internalQueryConfig;
@GuardedBy("lock")
private boolean refreshImmediately = false;
/**
* Counts the total number of known segments. This variable is used only for the segments table in the system schema
* to initialize a map with a more proper size when it creates a snapshot. As a result, it doesn't have to be exact,
* and thus there is no concurrency control for this variable.
*/
private int totalSegments = 0;
// Newest segments first, so they override older ones.
protected static final Comparator SEGMENT_ORDER = Comparator
.comparing((SegmentId segmentId) -> segmentId.getInterval().getStart())
.reversed()
.thenComparing(Function.identity());
protected static final Interner ROW_SIGNATURE_INTERNER = Interners.newWeakInterner();
/**
* DataSource -> Segment -> AvailableSegmentMetadata(contains RowSignature) for that segment.
* Use SortedMap for segments so they are merged in deterministic order, from older to newer.
*
* This map is updated by these two threads.
*
* - {@link #callbackExec} can update it in {@link #addSegment}, {@link #removeServerSegment},
* and {@link #removeSegment}.
* - {@link #cacheExec} can update it in {@link #refreshSegmentsForDataSource}.
*
* While it is being updated, this map is read by these two types of thread.
*
* - {@link #cacheExec} can iterate all {@link AvailableSegmentMetadata}s per datasource.
* See {@link #buildDataSourceRowSignature}.
* - Query threads can create a snapshot of the entire map for processing queries on the system table.
* See {@link #getSegmentMetadataSnapshot()}.
*
* As the access pattern of this map is read-intensive, we should minimize the contention between writers and readers.
* Since there are two threads that can update this map at the same time, those writers should lock the inner map
* first and then lock the entry before it updates segment metadata. This can be done using
* {@link ConcurrentMap#compute} as below. Note that, if you need to update the variables guarded by {@link #lock}
* inside of compute(), you should get the lock before calling compute() to keep the function executed in compute()
* not expensive.
*
*
* segmentMedataInfo.compute(
* datasourceParam,
* (datasource, segmentsMap) -> {
* if (segmentsMap == null) return null;
* else {
* segmentsMap.compute(
* segmentIdParam,
* (segmentId, segmentMetadata) -> {
* // update segmentMetadata
* }
* );
* return segmentsMap;
* }
* }
* );
*
*
* Readers can simply delegate the locking to the concurrent map and iterate map entries.
*/
protected final ConcurrentHashMap> segmentMetadataInfo
= new ConcurrentHashMap<>();
protected final ExecutorService cacheExec;
protected final ExecutorService callbackExec;
@GuardedBy("lock")
protected boolean isServerViewInitialized = false;
protected final ServiceEmitter emitter;
/**
* Map of datasource and generic object extending DataSourceInformation.
* This structure can be accessed by {@link #cacheExec} and {@link #callbackExec} threads.
*/
protected final ConcurrentHashMap tables = new ConcurrentHashMap<>();
/**
* This lock coordinates the access from multiple threads to those variables guarded by this lock.
* Currently, there are 2 threads that can access these variables.
*
* - {@link #callbackExec} executes the timeline callbacks whenever ServerView changes.
* - {@code cacheExec} periodically refreshes segment metadata and {@link DataSourceInformation} if necessary
* based on the information collected via timeline callbacks.
*/
protected final Object lock = new Object();
// All mutable segments.
@GuardedBy("lock")
protected final TreeSet mutableSegments = new TreeSet<>(SEGMENT_ORDER);
// All datasources that need tables regenerated.
@GuardedBy("lock")
protected final Set dataSourcesNeedingRebuild = new HashSet<>();
// All segments that need to be refreshed.
@GuardedBy("lock")
protected final TreeSet segmentsNeedingRefresh = new TreeSet<>(SEGMENT_ORDER);
public AbstractSegmentMetadataCache(
final QueryLifecycleFactory queryLifecycleFactory,
final SegmentMetadataCacheConfig config,
final Escalator escalator,
final InternalQueryConfig internalQueryConfig,
final ServiceEmitter emitter
)
{
this.queryLifecycleFactory = Preconditions.checkNotNull(queryLifecycleFactory, "queryLifecycleFactory");
this.config = Preconditions.checkNotNull(config, "config");
this.columnTypeMergePolicy = config.getMetadataColumnTypeMergePolicy();
this.cacheExec = Execs.singleThreaded("DruidSchema-Cache-%d");
this.callbackExec = Execs.singleThreaded("DruidSchema-Callback-%d");
this.escalator = escalator;
this.internalQueryConfig = internalQueryConfig;
this.emitter = emitter;
}
protected void cacheExecLoop()
{
final Stopwatch stopwatch = Stopwatch.createStarted();
long lastRefresh = 0L;
long lastFailure = 0L;
try {
refreshWaitCondition();
while (!Thread.currentThread().isInterrupted()) {
final Set segmentsToRefresh = new TreeSet<>();
final Set dataSourcesToRebuild = new TreeSet<>();
try {
synchronized (lock) {
final long nextRefreshNoFuzz = DateTimes
.utc(lastRefresh)
.plus(config.getMetadataRefreshPeriod())
.getMillis();
// Fuzz a bit to spread load out when we have multiple brokers.
final long nextRefresh = nextRefreshNoFuzz + (long) ((nextRefreshNoFuzz - lastRefresh) * 0.10);
while (true) {
// Do not refresh if it's too soon after a failure (to avoid rapid cycles of failure).
final boolean wasRecentFailure = DateTimes.utc(lastFailure)
.plus(config.getMetadataRefreshPeriod())
.isAfterNow();
if (isServerViewInitialized &&
!wasRecentFailure &&
shouldRefresh() &&
(refreshImmediately || nextRefresh < System.currentTimeMillis())) {
// We need to do a refresh. Break out of the waiting loop.
break;
}
// lastFailure != 0L means exceptions happened before and there're some refresh work was not completed.
// so that even if ServerView is initialized, we can't let broker complete initialization.
if (isServerViewInitialized && lastFailure == 0L) {
// Server view is initialized, but we don't need to do a refresh. Could happen if there are
// no segments in the system yet. Just mark us as initialized, then.
setInitializedAndReportInitTime(stopwatch);
}
// Wait some more, we'll wake up when it might be time to do another refresh.
lock.wait(Math.max(1, nextRefresh - System.currentTimeMillis()));
}
segmentsToRefresh.addAll(segmentsNeedingRefresh);
segmentsNeedingRefresh.clear();
// Mutable segments need a refresh every period, since new columns could be added dynamically.
segmentsNeedingRefresh.addAll(mutableSegments);
lastFailure = 0L;
lastRefresh = System.currentTimeMillis();
refreshImmediately = false;
}
refresh(segmentsToRefresh, dataSourcesToRebuild);
setInitializedAndReportInitTime(stopwatch);
}
catch (InterruptedException e) {
// Fall through.
throw e;
}
catch (Exception e) {
log.warn(e, "Metadata refresh failed, trying again soon.");
synchronized (lock) {
// Add our segments and datasources back to their refresh and rebuild lists.
segmentsNeedingRefresh.addAll(segmentsToRefresh);
dataSourcesNeedingRebuild.addAll(dataSourcesToRebuild);
lastFailure = System.currentTimeMillis();
}
}
}
}
catch (InterruptedException e) {
// Just exit.
}
catch (Throwable e) {
// Throwables that fall out to here (not caught by an inner try/catch) are potentially gnarly, like
// OOMEs. Anyway, let's just emit an alert and stop refreshing metadata.
log.makeAlert(e, "Metadata refresh failed permanently").emit();
throw e;
}
finally {
log.info("Metadata refresh stopped.");
}
}
/**
* Lifecycle start method.
*/
public abstract void start() throws InterruptedException;
/**
* Lifecycle stop method.
*/
public abstract void stop();
private void setInitializedAndReportInitTime(Stopwatch stopwatch)
{
// report the cache init time
if (initialized.getCount() == 1) {
long elapsedTime = stopwatch.elapsed(TimeUnit.MILLISECONDS);
emitter.emit(ServiceMetricEvent.builder().setMetric("metadatacache/init/time", elapsedTime));
log.info("%s initialized in [%,d] ms.", getClass().getSimpleName(), elapsedTime);
stopwatch.stop();
}
initialized.countDown();
}
public void refreshWaitCondition() throws InterruptedException
{
// noop
}
/**
* Refresh is executed only when there are segments or datasources needing refresh.
*/
@SuppressWarnings("GuardedBy")
protected boolean shouldRefresh()
{
return (!segmentsNeedingRefresh.isEmpty() || !dataSourcesNeedingRebuild.isEmpty());
}
public void awaitInitialization() throws InterruptedException
{
initialized.await();
}
/**
* Fetch schema for the given datasource.
*
* @param name datasource
*
* @return schema information for the given datasource
*/
@Nullable
public T getDatasource(String name)
{
return tables.get(name);
}
/**
* @return Map of datasource and corresponding schema information.
*/
public Map getDataSourceInformationMap()
{
return ImmutableMap.copyOf(tables);
}
/**
* @return Set of datasources for which schema information is cached.
*/
public Set getDatasourceNames()
{
return tables.keySet();
}
/**
* Get metadata for all the cached segments, which includes information like RowSignature, realtime & numRows etc.
*
* @return Map of segmentId and corresponding metadata.
*/
public Map getSegmentMetadataSnapshot()
{
final Map segmentMetadata = Maps.newHashMapWithExpectedSize(getTotalSegments());
final Iterator it = iterateSegmentMetadata();
while (it.hasNext()) {
final AvailableSegmentMetadata availableSegmentMetadata = it.next();
segmentMetadata.put(availableSegmentMetadata.getSegment().getId(), availableSegmentMetadata);
}
return segmentMetadata;
}
/**
* Get metadata for all the cached segments, which includes information like RowSignature, realtime & numRows etc.
* This is a lower-overhead method than {@link #getSegmentMetadataSnapshot()}.
*
* @return iterator of metadata.
*/
public Iterator iterateSegmentMetadata()
{
return FluentIterable.from(segmentMetadataInfo.values())
.transformAndConcat(Map::values)
.iterator();
}
/**
* Get metadata for the specified segment, which includes information like RowSignature, realtime & numRows.
*
* @param datasource segment datasource
* @param segmentId segment Id
*
* @return Metadata information for the given segment
*/
@Nullable
public AvailableSegmentMetadata getAvailableSegmentMetadata(String datasource, SegmentId segmentId)
{
final ConcurrentSkipListMap dataSourceMap =
segmentMetadataInfo.get(datasource);
if (dataSourceMap == null) {
return null;
} else {
return dataSourceMap.get(segmentId);
}
}
/**
* Returns total number of segments. This method doesn't use the lock intentionally to avoid expensive contention.
* As a result, the returned value might be inexact.
*/
public int getTotalSegments()
{
return totalSegments;
}
/**
* The child classes must override this method with the logic to build and cache table schema.
*
* @param segmentsToRefresh segments for which the schema might have changed
* @param dataSourcesToRebuild datasources for which the schema might have changed
* @throws IOException when querying segment schema from data nodes and tasks
*/
public abstract void refresh(Set segmentsToRefresh, Set dataSourcesToRebuild) throws IOException;
@VisibleForTesting
public void addSegment(final DruidServerMetadata server, final DataSegment segment)
{
// Get lock first so that we won't wait in ConcurrentMap.compute().
synchronized (lock) {
// someday we could hypothetically remove broker special casing, whenever BrokerServerView supports tracking
// broker served segments in the timeline, to ensure that removeSegment the event is triggered accurately
if (server.getType().equals(ServerType.BROKER)) {
// a segment on a broker means a broadcast datasource, skip metadata because we'll also see this segment on the
// historical, however mark the datasource for refresh because it needs to be globalized
markDataSourceAsNeedRebuild(segment.getDataSource());
} else {
segmentMetadataInfo.compute(
segment.getDataSource(),
(datasource, segmentsMap) -> {
if (segmentsMap == null) {
segmentsMap = new ConcurrentSkipListMap<>(SEGMENT_ORDER);
}
segmentsMap.compute(
segment.getId(),
(segmentId, segmentMetadata) -> {
if (segmentMetadata == null) {
// Unknown segment.
totalSegments++;
// segmentReplicatable is used to determine if segments are served by historical or realtime servers
long isRealtime = server.isSegmentReplicationTarget() ? 0 : 1;
segmentMetadata = AvailableSegmentMetadata
.builder(segment, isRealtime, ImmutableSet.of(server), null, DEFAULT_NUM_ROWS) // Added without needing a refresh
.build();
if (segment.isTombstone()) {
log.debug("Skipping refresh for tombstone segment.");
} else {
markSegmentAsNeedRefresh(segment.getId());
}
if (!server.isSegmentReplicationTarget()) {
log.debug("Added new mutable segment [%s].", segment.getId());
markSegmentAsMutable(segment.getId());
} else {
log.debug("Added new immutable segment [%s].", segment.getId());
}
} else {
// We know this segment.
final Set segmentServers = segmentMetadata.getReplicas();
final ImmutableSet servers = new ImmutableSet.Builder()
.addAll(segmentServers)
.add(server)
.build();
segmentMetadata = AvailableSegmentMetadata
.from(segmentMetadata)
.withReplicas(servers)
.withRealtime(recomputeIsRealtime(servers))
.build();
if (server.isSegmentReplicationTarget()) {
// If a segment shows up on a replicatable (historical) server at any point, then it must be immutable,
// even if it's also available on non-replicatable (realtime) servers.
unmarkSegmentAsMutable(segment.getId());
log.debug("Segment[%s] has become immutable.", segment.getId());
}
}
assert segmentMetadata != null;
return segmentMetadata;
}
);
return segmentsMap;
}
);
}
if (!tables.containsKey(segment.getDataSource())) {
refreshImmediately = true;
}
lock.notifyAll();
}
}
@VisibleForTesting
public void removeSegment(final DataSegment segment)
{
// Get lock first so that we won't wait in ConcurrentMap.compute().
synchronized (lock) {
log.debug("Segment [%s] is gone.", segment.getId());
segmentsNeedingRefresh.remove(segment.getId());
unmarkSegmentAsMutable(segment.getId());
segmentMetadataInfo.compute(
segment.getDataSource(),
(dataSource, segmentsMap) -> {
if (segmentsMap == null) {
log.warn("Unknown segment [%s] was removed from the cluster. Ignoring this event.", segment.getId());
return null;
} else {
if (segmentsMap.remove(segment.getId()) == null) {
log.warn("Unknown segment [%s] was removed from the cluster. Ignoring this event.", segment.getId());
} else {
totalSegments--;
}
removeSegmentAction(segment.getId());
if (segmentsMap.isEmpty()) {
tables.remove(segment.getDataSource());
log.info("dataSource [%s] no longer exists, all metadata removed.", segment.getDataSource());
return null;
} else {
markDataSourceAsNeedRebuild(segment.getDataSource());
return segmentsMap;
}
}
}
);
lock.notifyAll();
}
}
/**
* This method should be overridden by child classes to execute any action on segment removal.
*/
protected abstract void removeSegmentAction(SegmentId segmentId);
@VisibleForTesting
public void removeServerSegment(final DruidServerMetadata server, final DataSegment segment)
{
// Get lock first so that we won't wait in ConcurrentMap.compute().
synchronized (lock) {
log.debug("Segment [%s] is gone from server [%s]", segment.getId(), server.getName());
segmentMetadataInfo.compute(
segment.getDataSource(),
(datasource, knownSegments) -> {
if (knownSegments == null) {
log.warn(
"Unknown segment [%s] is removed from server [%s]. Ignoring this event",
segment.getId(),
server.getHost()
);
return null;
}
if (server.getType().equals(ServerType.BROKER)) {
// for brokers, if the segment drops from all historicals before the broker this could be null.
if (!knownSegments.isEmpty()) {
// a segment on a broker means a broadcast datasource, skip metadata because we'll also see this segment on the
// historical, however mark the datasource for refresh because it might no longer be broadcast or something
markDataSourceAsNeedRebuild(segment.getDataSource());
}
} else {
knownSegments.compute(
segment.getId(),
(segmentId, segmentMetadata) -> {
if (segmentMetadata == null) {
log.warn(
"Unknown segment [%s] is removed from server [%s]. Ignoring this event",
segment.getId(),
server.getHost()
);
return null;
} else {
final Set segmentServers = segmentMetadata.getReplicas();
final ImmutableSet servers = FluentIterable
.from(segmentServers)
.filter(Predicates.not(Predicates.equalTo(server)))
.toSet();
return AvailableSegmentMetadata
.from(segmentMetadata)
.withReplicas(servers)
.withRealtime(recomputeIsRealtime(servers))
.build();
}
}
);
}
if (knownSegments.isEmpty()) {
return null;
} else {
return knownSegments;
}
}
);
lock.notifyAll();
}
}
protected void markSegmentAsNeedRefresh(SegmentId segmentId)
{
synchronized (lock) {
segmentsNeedingRefresh.add(segmentId);
}
}
private void markSegmentAsMutable(SegmentId segmentId)
{
synchronized (lock) {
mutableSegments.add(segmentId);
}
}
protected void unmarkSegmentAsMutable(SegmentId segmentId)
{
synchronized (lock) {
mutableSegments.remove(segmentId);
}
}
@VisibleForTesting
public void markDataSourceAsNeedRebuild(String datasource)
{
synchronized (lock) {
dataSourcesNeedingRebuild.add(datasource);
}
}
/**
* Attempt to refresh "segmentSignatures" for a set of segments. Returns the set of segments actually refreshed,
* which may be a subset of the asked-for set.
*/
@VisibleForTesting
public Set refreshSegments(final Set segments) throws IOException
{
final Set retVal = new HashSet<>();
// Organize segments by datasource.
final Map> segmentMap = new TreeMap<>();
for (SegmentId segmentId : segments) {
segmentMap.computeIfAbsent(segmentId.getDataSource(), x -> new TreeSet<>(SEGMENT_ORDER))
.add(segmentId);
}
for (Map.Entry> entry : segmentMap.entrySet()) {
final String dataSource = entry.getKey();
retVal.addAll(refreshSegmentsForDataSource(dataSource, entry.getValue()));
}
return retVal;
}
private long recomputeIsRealtime(ImmutableSet servers)
{
if (servers.isEmpty()) {
return 0;
}
final Optional historicalServer = servers
.stream()
// Ideally, this filter should have checked whether it's a broadcast segment loaded in brokers.
// However, we don't current track of the broadcast segments loaded in brokers, so this filter is still valid.
// See addSegment(), removeServerSegment(), and removeSegment()
.filter(metadata -> metadata.getType().equals(ServerType.HISTORICAL))
.findAny();
// if there is any historical server in the replicas, isRealtime flag should be unset
return historicalServer.isPresent() ? 0 : 1;
}
/**
* Attempt to refresh "segmentSignatures" for a set of segments for a particular dataSource. Returns the set of
* segments actually refreshed, which may be a subset of the asked-for set.
*/
public Set refreshSegmentsForDataSource(final String dataSource, final Set segments)
throws IOException
{
final Stopwatch stopwatch = Stopwatch.createStarted();
if (!segments.stream().allMatch(segmentId -> segmentId.getDataSource().equals(dataSource))) {
// Sanity check. We definitely expect this to pass.
throw new ISE("'segments' must all match 'dataSource'!");
}
log.debug("Refreshing metadata for datasource[%s].", dataSource);
final ServiceMetricEvent.Builder builder =
new ServiceMetricEvent.Builder().setDimension(DruidMetrics.DATASOURCE, dataSource);
emitter.emit(builder.setMetric("metadatacache/refresh/count", segments.size()));
// Segment id string -> SegmentId object.
final Map segmentIdMap = Maps.uniqueIndex(segments, SegmentId::toString);
final Set retVal = new HashSet<>();
logSegmentsToRefresh(dataSource, segments);
final Sequence sequence = runSegmentMetadataQuery(
Iterables.limit(segments, MAX_SEGMENTS_PER_QUERY)
);
Yielder yielder = Yielders.each(sequence);
try {
while (!yielder.isDone()) {
final SegmentAnalysis analysis = yielder.get();
final SegmentId segmentId = segmentIdMap.get(analysis.getId());
if (segmentId == null) {
log.warn("Got analysis for segment [%s] we didn't ask for, ignoring.", analysis.getId());
} else {
final RowSignature rowSignature = analysisToRowSignature(analysis);
log.debug("Segment[%s] has signature[%s].", segmentId, rowSignature);
if (segmentMetadataQueryResultHandler(dataSource, segmentId, rowSignature, analysis)) {
retVal.add(segmentId);
}
}
yielder = yielder.next(null);
}
}
finally {
yielder.close();
}
long refreshDurationMillis = stopwatch.elapsed(TimeUnit.MILLISECONDS);
emitter.emit(builder.setMetric("metadatacache/refresh/time", refreshDurationMillis));
log.debug(
"Refreshed metadata for datasource [%s] in %,d ms (%d segments queried, %d segments left).",
dataSource,
refreshDurationMillis,
retVal.size(),
segments.size() - retVal.size()
);
return retVal;
}
/**
* Log the segment details for a datasource to be refreshed for debugging purpose.
*/
void logSegmentsToRefresh(String dataSource, Set ids)
{
// no-op
}
/**
* Action to be executed on the result of Segment metadata query.
* Returns if the segment metadata was updated.
*/
protected boolean segmentMetadataQueryResultHandler(
String dataSource,
SegmentId segmentId,
RowSignature rowSignature,
SegmentAnalysis analysis
)
{
AtomicBoolean added = new AtomicBoolean(false);
segmentMetadataInfo.compute(
dataSource,
(datasourceKey, dataSourceSegments) -> {
if (dataSourceSegments == null) {
// Datasource may have been removed or become unavailable while this refresh was ongoing.
log.warn(
"No segment map found with datasource [%s], skipping refresh of segment [%s]",
datasourceKey,
segmentId
);
return null;
} else {
dataSourceSegments.compute(
segmentId,
(segmentIdKey, segmentMetadata) -> {
if (segmentMetadata == null) {
log.warn("No segment [%s] found, skipping refresh", segmentId);
return null;
} else {
final AvailableSegmentMetadata updatedSegmentMetadata = AvailableSegmentMetadata
.from(segmentMetadata)
.withRowSignature(rowSignature)
.withNumRows(analysis.getNumRows())
.build();
added.set(true);
return updatedSegmentMetadata;
}
}
);
if (dataSourceSegments.isEmpty()) {
return null;
} else {
return dataSourceSegments;
}
}
}
);
return added.get();
}
@VisibleForTesting
@Nullable
public RowSignature buildDataSourceRowSignature(final String dataSource)
{
ConcurrentSkipListMap segmentsMap = segmentMetadataInfo.get(dataSource);
// Preserve order.
final Map columnTypes = new LinkedHashMap<>();
if (segmentsMap != null && !segmentsMap.isEmpty()) {
for (AvailableSegmentMetadata availableSegmentMetadata : segmentsMap.values()) {
final RowSignature rowSignature = availableSegmentMetadata.getRowSignature();
if (rowSignature != null) {
for (String column : rowSignature.getColumnNames()) {
final ColumnType columnType =
rowSignature.getColumnType(column)
.orElseThrow(() -> new ISE("Encountered null type for column [%s]", column));
columnTypes.compute(column, (c, existingType) -> columnTypeMergePolicy.merge(existingType, columnType));
}
}
}
} else {
// table has no segments
return null;
}
final RowSignature.Builder builder = RowSignature.builder();
columnTypes.forEach(builder::add);
return builder.build();
}
@VisibleForTesting
public TreeSet getSegmentsNeedingRefresh()
{
synchronized (lock) {
return segmentsNeedingRefresh;
}
}
@VisibleForTesting
public TreeSet getMutableSegments()
{
synchronized (lock) {
return mutableSegments;
}
}
@VisibleForTesting
public Set getDataSourcesNeedingRebuild()
{
synchronized (lock) {
return dataSourcesNeedingRebuild;
}
}
protected boolean fetchAggregatorsInSegmentMetadataQuery()
{
return false;
}
/**
* Execute a SegmentMetadata query and return a {@link Sequence} of {@link SegmentAnalysis}.
*
* @param segments Iterable of {@link SegmentId} objects that are subject of the SegmentMetadata query.
* @return {@link Sequence} of {@link SegmentAnalysis} objects
*/
@VisibleForTesting
public Sequence runSegmentMetadataQuery(
final Iterable segments
)
{
// Sanity check: getOnlyElement of a set, to ensure all segments have the same datasource.
final String dataSource = Iterables.getOnlyElement(
StreamSupport.stream(segments.spliterator(), false)
.map(SegmentId::getDataSource).collect(Collectors.toSet())
);
final MultipleSpecificSegmentSpec querySegmentSpec = new MultipleSpecificSegmentSpec(
StreamSupport.stream(segments.spliterator(), false)
.map(SegmentId::toDescriptor).collect(Collectors.toList())
);
final SegmentMetadataQuery segmentMetadataQuery = new SegmentMetadataQuery(
new TableDataSource(dataSource),
querySegmentSpec,
new AllColumnIncluderator(),
false,
// disable the parallel merge because we don't care about the merge and don't want to consume its resources
QueryContexts.override(
internalQueryConfig.getContext(),
QueryContexts.BROKER_PARALLEL_MERGE_KEY,
false
),
fetchAggregatorsInSegmentMetadataQuery()
? EnumSet.of(SegmentMetadataQuery.AnalysisType.AGGREGATORS)
: EnumSet.noneOf(SegmentMetadataQuery.AnalysisType.class),
false,
null,
null // we don't care about merging strategy because merge is false
);
return queryLifecycleFactory
.factorize()
.runSimple(segmentMetadataQuery, escalator.createEscalatedAuthenticationResult(), Access.OK).getResults();
}
@VisibleForTesting
static RowSignature analysisToRowSignature(final SegmentAnalysis analysis)
{
final RowSignature.Builder rowSignatureBuilder = RowSignature.builder();
for (Map.Entry entry : analysis.getColumns().entrySet()) {
if (entry.getValue().isError()) {
// Skip columns with analysis errors.
continue;
}
ColumnType valueType = entry.getValue().getTypeSignature();
// this shouldn't happen, but if it does, first try to fall back to legacy type information field in case
// standard upgrade order was not followed for 0.22 to 0.23+, and if that also fails, then assume types are some
// flavor of COMPLEX.
if (valueType == null) {
// at some point in the future this can be simplified to the contents of the catch clause here, once the
// likelyhood of upgrading from some version lower than 0.23 is low
try {
valueType = ColumnType.fromString(entry.getValue().getType());
if (valueType == null) {
valueType = ColumnType.ofComplex(entry.getValue().getType());
}
}
catch (IllegalArgumentException ignored) {
valueType = ColumnType.UNKNOWN_COMPLEX;
}
}
rowSignatureBuilder.add(entry.getKey(), valueType);
}
return ROW_SIGNATURE_INTERNER.intern(rowSignatureBuilder.build());
}
/**
* This method is not thread-safe and must be used only in unit tests.
*/
@VisibleForTesting
public void setAvailableSegmentMetadata(final SegmentId segmentId, final AvailableSegmentMetadata availableSegmentMetadata)
{
final ConcurrentSkipListMap dataSourceSegments = segmentMetadataInfo
.computeIfAbsent(
segmentId.getDataSource(),
k -> new ConcurrentSkipListMap<>(SEGMENT_ORDER)
);
if (dataSourceSegments.put(segmentId, availableSegmentMetadata) == null) {
totalSegments++;
}
}
/**
* This is a helper method for unit tests to emulate heavy work done with {@link #lock}.
* It must be used only in unit tests.
*/
@VisibleForTesting
protected void doInLock(Runnable runnable)
{
synchronized (lock) {
runnable.run();
}
}
/**
* ColumnTypeMergePolicy defines the rules of which type to use when faced with the possibility of different types
* for the same column from segment to segment. It is used to help compute a {@link RowSignature} for a table in
* Druid based on the segment metadata of all segments, merging the types of each column encountered to end up with
* a single type to represent it globally.
*/
@FunctionalInterface
public interface ColumnTypeMergePolicy
{
ColumnType merge(ColumnType existingType, ColumnType newType);
@JsonCreator
static ColumnTypeMergePolicy fromString(String type)
{
if (LeastRestrictiveTypeMergePolicy.NAME.equalsIgnoreCase(type)) {
return LeastRestrictiveTypeMergePolicy.INSTANCE;
}
if (FirstTypeMergePolicy.NAME.equalsIgnoreCase(type)) {
return FirstTypeMergePolicy.INSTANCE;
}
throw new IAE("Unknown type [%s]", type);
}
}
/**
* Classic logic, we use the first type we encounter. This policy is effectively 'newest first' because we iterated
* segments starting from the most recent time chunk, so this typically results in the most recently used type being
* chosen, at least for systems that are continuously updated with 'current' data.
*
* Since {@link ColumnTypeMergePolicy} are used to compute the SQL schema, at least in systems using SQL schemas which
* are partially or fully computed by this cache, this merge policy can result in query time errors if incompatible
* types are mixed if the chosen type is more restrictive than the types of some segments. If data is likely to vary
* in type across segments, consider using {@link LeastRestrictiveTypeMergePolicy} instead.
*/
public static class FirstTypeMergePolicy implements ColumnTypeMergePolicy
{
public static final String NAME = "latestInterval";
private static final FirstTypeMergePolicy INSTANCE = new FirstTypeMergePolicy();
@Override
public ColumnType merge(ColumnType existingType, ColumnType newType)
{
if (existingType == null) {
return newType;
}
if (newType == null) {
return existingType;
}
// if any are json, are all json
if (ColumnType.NESTED_DATA.equals(newType) || ColumnType.NESTED_DATA.equals(existingType)) {
return ColumnType.NESTED_DATA;
}
// "existing type" is the 'newest' type, since we iterate the segments list by newest start time
return existingType;
}
@Override
public int hashCode()
{
return Objects.hash(NAME);
}
@Override
public boolean equals(Object o)
{
if (this == o) {
return true;
}
return o != null && getClass() == o.getClass();
}
@Override
public String toString()
{
return NAME;
}
}
/**
* Resolves types using {@link ColumnType#leastRestrictiveType(ColumnType, ColumnType)} to find the ColumnType that
* can best represent all data contained across all segments.
*/
public static class LeastRestrictiveTypeMergePolicy implements ColumnTypeMergePolicy
{
public static final String NAME = "leastRestrictive";
private static final LeastRestrictiveTypeMergePolicy INSTANCE = new LeastRestrictiveTypeMergePolicy();
@Override
public ColumnType merge(ColumnType existingType, ColumnType newType)
{
try {
return ColumnType.leastRestrictiveType(existingType, newType);
}
catch (Types.IncompatibleTypeException incompatibleTypeException) {
// fall back to first encountered type if they are not compatible for some reason
return FirstTypeMergePolicy.INSTANCE.merge(existingType, newType);
}
}
@Override
public int hashCode()
{
return Objects.hash(NAME);
}
@Override
public boolean equals(Object o)
{
if (this == o) {
return true;
}
return o != null && getClass() == o.getClass();
}
@Override
public String toString()
{
return NAME;
}
}
}