![JAR search and dependency download from the Maven repository](/logo.png)
org.apache.druid.metadata.SqlSegmentsMetadataManager Maven / Gradle / Ivy
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.apache.druid.metadata;
import com.fasterxml.jackson.databind.ObjectMapper;
import com.google.common.annotations.VisibleForTesting;
import com.google.common.base.Optional;
import com.google.common.base.Preconditions;
import com.google.common.base.Supplier;
import com.google.common.base.Throwables;
import com.google.common.collect.ImmutableList;
import com.google.common.collect.ImmutableMap;
import com.google.common.collect.Iterables;
import com.google.common.collect.Iterators;
import com.google.common.collect.Lists;
import com.google.common.util.concurrent.Futures;
import com.google.errorprone.annotations.concurrent.GuardedBy;
import com.google.inject.Inject;
import org.apache.druid.client.DataSourcesSnapshot;
import org.apache.druid.client.ImmutableDruidDataSource;
import org.apache.druid.error.DruidException;
import org.apache.druid.error.InvalidInput;
import org.apache.druid.guice.ManageLifecycle;
import org.apache.druid.java.util.common.DateTimes;
import org.apache.druid.java.util.common.Intervals;
import org.apache.druid.java.util.common.JodaUtils;
import org.apache.druid.java.util.common.MapUtils;
import org.apache.druid.java.util.common.Pair;
import org.apache.druid.java.util.common.Stopwatch;
import org.apache.druid.java.util.common.StringUtils;
import org.apache.druid.java.util.common.concurrent.Execs;
import org.apache.druid.java.util.common.lifecycle.LifecycleStart;
import org.apache.druid.java.util.common.lifecycle.LifecycleStop;
import org.apache.druid.java.util.common.parsers.CloseableIterator;
import org.apache.druid.java.util.emitter.EmittingLogger;
import org.apache.druid.java.util.emitter.service.ServiceEmitter;
import org.apache.druid.java.util.emitter.service.ServiceMetricEvent;
import org.apache.druid.segment.SchemaPayload;
import org.apache.druid.segment.SegmentMetadata;
import org.apache.druid.segment.metadata.CentralizedDatasourceSchemaConfig;
import org.apache.druid.segment.metadata.SegmentSchemaCache;
import org.apache.druid.server.http.DataSegmentPlus;
import org.apache.druid.timeline.DataSegment;
import org.apache.druid.timeline.Partitions;
import org.apache.druid.timeline.SegmentId;
import org.apache.druid.timeline.SegmentTimeline;
import org.checkerframework.checker.nullness.qual.MonotonicNonNull;
import org.joda.time.DateTime;
import org.joda.time.Duration;
import org.joda.time.Interval;
import org.skife.jdbi.v2.BaseResultSetMapper;
import org.skife.jdbi.v2.Batch;
import org.skife.jdbi.v2.FoldController;
import org.skife.jdbi.v2.Handle;
import org.skife.jdbi.v2.Query;
import org.skife.jdbi.v2.StatementContext;
import org.skife.jdbi.v2.TransactionCallback;
import org.skife.jdbi.v2.TransactionStatus;
import javax.annotation.Nullable;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collection;
import java.util.Collections;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Objects;
import java.util.Set;
import java.util.concurrent.CompletableFuture;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
import java.util.concurrent.Future;
import java.util.concurrent.ScheduledExecutorService;
import java.util.concurrent.TimeUnit;
import java.util.concurrent.locks.ReentrantReadWriteLock;
import java.util.stream.Collectors;
/**
*
*/
@ManageLifecycle
public class SqlSegmentsMetadataManager implements SegmentsMetadataManager
{
private static final EmittingLogger log = new EmittingLogger(SqlSegmentsMetadataManager.class);
/**
* Marker interface for objects stored in {@link #latestDatabasePoll}. See the comment for that field for details.
*/
private interface DatabasePoll
{}
/** Represents periodic {@link #poll}s happening from {@link #exec}. */
@VisibleForTesting
static class PeriodicDatabasePoll implements DatabasePoll
{
/**
* This future allows to wait until {@link #dataSourcesSnapshot} is initialized in the first {@link #poll()}
* happening since {@link #startPollingDatabasePeriodically()} is called for the first time, or since the last
* visible (in happens-before terms) call to {@link #startPollingDatabasePeriodically()} in case of Coordinator's
* leadership changes.
*/
final CompletableFuture firstPollCompletionFuture = new CompletableFuture<>();
long lastPollStartTimestampInMs = -1;
}
/**
* Represents on-demand {@link #poll} initiated at periods of time when SqlSegmentsMetadataManager doesn't poll the database
* periodically.
*/
@VisibleForTesting
static class OnDemandDatabasePoll implements DatabasePoll
{
final long initiationTimeNanos = System.nanoTime();
final CompletableFuture pollCompletionFuture = new CompletableFuture<>();
long nanosElapsedFromInitiation()
{
return System.nanoTime() - initiationTimeNanos;
}
}
/**
* Use to synchronize {@link #startPollingDatabasePeriodically}, {@link #stopPollingDatabasePeriodically}, {@link
* #poll}, and {@link #isPollingDatabasePeriodically}. These methods should be synchronized to prevent from being
* called at the same time if two different threads are calling them. This might be possible if Coordinator gets and
* drops leadership repeatedly in quick succession.
*
* This lock is also used to synchronize {@link #useLatestIfWithinDelayOrPerformNewDatabasePoll} for times when SqlSegmentsMetadataManager
* is not polling the database periodically (in other words, when the Coordinator is not the leader).
*/
private final ReentrantReadWriteLock startStopPollLock = new ReentrantReadWriteLock();
/**
* Used to ensure that {@link #poll()} is never run concurrently. It should already be so (at least in production
* code), where {@link #poll()} is called only from the task created in {@link #createPollTaskForStartOrder} and is
* scheduled in a single-threaded {@link #exec}, so this lock is an additional safety net in case there are bugs in
* the code, and for tests, where {@link #poll()} is called from the outside code.
*
* Not using {@link #startStopPollLock}.writeLock() in order to still be able to run {@link #poll()} concurrently
* with {@link #isPollingDatabasePeriodically()}.
*/
private final Object pollLock = new Object();
private final ObjectMapper jsonMapper;
private final Duration periodicPollDelay;
private final Supplier dbTables;
private final SQLMetadataConnector connector;
private final SegmentSchemaCache segmentSchemaCache;
private final ServiceEmitter serviceEmitter;
private final CentralizedDatasourceSchemaConfig centralizedDatasourceSchemaConfig;
/**
* This field is made volatile to avoid "ghost secondary reads" that may result in NPE, see
* https://github.com/code-review-checklists/java-concurrency#safe-local-dcl (note that dataSourcesSnapshot resembles
* a lazily initialized field). Alternative is to always read the field in a snapshot local variable, but it's too
* easy to forget to do.
*
* This field may be updated from {@link #exec}, or from whatever thread calling {@link #doOnDemandPoll} via {@link
* #useLatestIfWithinDelayOrPerformNewDatabasePoll()} via one of the public methods of SqlSegmentsMetadataManager.
*/
private volatile @MonotonicNonNull DataSourcesSnapshot dataSourcesSnapshot = null;
/**
* The latest {@link DatabasePoll} represent {@link #poll()} calls which update {@link #dataSourcesSnapshot}, either
* periodically (see {@link PeriodicDatabasePoll}, {@link #startPollingDatabasePeriodically}, {@link
* #stopPollingDatabasePeriodically}) or "on demand" (see {@link OnDemandDatabasePoll}), when one of the methods that
* accesses {@link #dataSourcesSnapshot}'s state (such as {@link #getImmutableDataSourceWithUsedSegments}) is
* called when the Coordinator is not the leader and therefore SqlSegmentsMetadataManager isn't polling the database
* periodically.
*
* Note that if there is a happens-before relationship between a call to {@link #startPollingDatabasePeriodically()}
* (on Coordinators' leadership change) and one of the methods accessing the {@link #dataSourcesSnapshot}'s state in
* this class the latter is guaranteed to await for the initiated periodic poll. This is because when the latter
* method calls to {@link #useLatestSnapshotIfWithinDelay()} via {@link #useLatestIfWithinDelayOrPerformNewDatabasePoll}, they will
* see the latest {@link PeriodicDatabasePoll} value (stored in this field, latestDatabasePoll, in {@link
* #startPollingDatabasePeriodically()}) and to await on its {@link PeriodicDatabasePoll#firstPollCompletionFuture}.
*
* However, the guarantee explained above doesn't make any actual semantic difference, because on both periodic and
* on-demand database polls the same invariant is maintained that the results not older than {@link
* #periodicPollDelay} are used. The main difference is in performance: since on-demand polls are irregular and happen
* in the context of the thread wanting to access the {@link #dataSourcesSnapshot}, that may cause delays in the
* logic. On the other hand, periodic polls are decoupled into {@link #exec} and {@link
* #dataSourcesSnapshot}-accessing methods should be generally "wait free" for database polls.
*
* The notion and the complexity of "on demand" database polls was introduced to simplify the interface of {@link
* SegmentsMetadataManager} and guarantee that it always returns consistent and relatively up-to-date data from methods
* like {@link #getImmutableDataSourceWithUsedSegments}, while avoiding excessive repetitive polls. The last part
* is achieved via "hooking on" other polls by awaiting on {@link PeriodicDatabasePoll#firstPollCompletionFuture} or
* {@link OnDemandDatabasePoll#pollCompletionFuture}, see {@link #useLatestIfWithinDelayOrPerformNewDatabasePoll} method
* implementation for details.
*
* Note: the overall implementation of periodic/on-demand polls is not completely optimal: for example, when the
* Coordinator just stopped leading, the latest periodic {@link #poll} (which is still "fresh") is not considered
* and a new on-demand poll is always initiated. This is done to simplify the implementation, while the efficiency
* during Coordinator leadership switches is not a priority.
*
* This field is {@code volatile} because it's checked and updated in a double-checked locking manner in {@link
* #useLatestIfWithinDelayOrPerformNewDatabasePoll()}.
*/
private volatile @Nullable DatabasePoll latestDatabasePoll = null;
/** Used to cancel periodic poll task in {@link #stopPollingDatabasePeriodically}. */
@GuardedBy("startStopPollLock")
private @Nullable Future> periodicPollTaskFuture = null;
/** The number of times {@link #startPollingDatabasePeriodically} was called. */
@GuardedBy("startStopPollLock")
private long startPollingCount = 0;
/**
* Equal to the current {@link #startPollingCount} value if the SqlSegmentsMetadataManager is currently started; -1 if
* currently stopped.
*
* This field is used to implement a simple stamp mechanism instead of just a boolean "started" flag to prevent
* the theoretical situation of two or more tasks scheduled in {@link #startPollingDatabasePeriodically()} calling
* {@link #isPollingDatabasePeriodically()} and {@link #poll()} concurrently, if the sequence of {@link
* #startPollingDatabasePeriodically()} - {@link #stopPollingDatabasePeriodically()} - {@link
* #startPollingDatabasePeriodically()} actions occurs quickly.
*
* {@link SQLMetadataRuleManager} also has a similar issue.
*/
@GuardedBy("startStopPollLock")
private long currentStartPollingOrder = -1;
@GuardedBy("startStopPollLock")
private @Nullable ScheduledExecutorService exec = null;
private Future> usedFlagLastUpdatedPopulationFuture;
@Inject
public SqlSegmentsMetadataManager(
ObjectMapper jsonMapper,
Supplier config,
Supplier dbTables,
SQLMetadataConnector connector,
SegmentSchemaCache segmentSchemaCache,
CentralizedDatasourceSchemaConfig centralizedDatasourceSchemaConfig,
ServiceEmitter serviceEmitter
)
{
this.jsonMapper = jsonMapper;
this.periodicPollDelay = config.get().getPollDuration().toStandardDuration();
this.dbTables = dbTables;
this.connector = connector;
this.segmentSchemaCache = segmentSchemaCache;
this.centralizedDatasourceSchemaConfig = centralizedDatasourceSchemaConfig;
this.serviceEmitter = serviceEmitter;
}
/**
* Don't confuse this method with {@link #startPollingDatabasePeriodically}. This is a lifecycle starting method to
* be executed just once for an instance of SqlSegmentsMetadataManager.
*/
@LifecycleStart
public void start()
{
ReentrantReadWriteLock.WriteLock lock = startStopPollLock.writeLock();
lock.lock();
try {
if (exec != null) {
return; // Already started
}
exec = Execs.scheduledSingleThreaded(StringUtils.encodeForFormat(getClass().getName()) + "-Exec--%d");
}
finally {
lock.unlock();
}
}
/**
* Don't confuse this method with {@link #stopPollingDatabasePeriodically}. This is a lifecycle stopping method to
* be executed just once for an instance of SqlSegmentsMetadataManager.
*/
@LifecycleStop
public void stop()
{
ReentrantReadWriteLock.WriteLock lock = startStopPollLock.writeLock();
lock.lock();
try {
exec.shutdownNow();
exec = null;
}
finally {
lock.unlock();
}
}
@Override
public void startPollingDatabasePeriodically()
{
ReentrantReadWriteLock.WriteLock lock = startStopPollLock.writeLock();
lock.lock();
try {
if (exec == null) {
throw new IllegalStateException(getClass().getName() + " is not started");
}
if (isPollingDatabasePeriodically()) {
return;
}
PeriodicDatabasePoll periodicDatabasePoll = new PeriodicDatabasePoll();
latestDatabasePoll = periodicDatabasePoll;
startPollingCount++;
currentStartPollingOrder = startPollingCount;
final long localStartOrder = currentStartPollingOrder;
periodicPollTaskFuture = exec.scheduleWithFixedDelay(
createPollTaskForStartOrder(localStartOrder, periodicDatabasePoll),
0,
periodicPollDelay.getMillis(),
TimeUnit.MILLISECONDS
);
}
finally {
lock.unlock();
}
}
@Override
public void stopAsyncUsedFlagLastUpdatedUpdate()
{
if (!usedFlagLastUpdatedPopulationFuture.isDone() && !usedFlagLastUpdatedPopulationFuture.isCancelled()) {
usedFlagLastUpdatedPopulationFuture.cancel(true);
}
}
@Override
public void populateUsedFlagLastUpdatedAsync()
{
ExecutorService executorService = Executors.newSingleThreadExecutor();
usedFlagLastUpdatedPopulationFuture = executorService.submit(
this::populateUsedFlagLastUpdated
);
}
/**
* Populate used_status_last_updated for unused segments whose current value for said column is NULL
*
* The updates are made incrementally.
*/
@VisibleForTesting
void populateUsedFlagLastUpdated()
{
final String segmentsTable = getSegmentsTable();
log.info(
"Populating column 'used_status_last_updated' with non-NULL values for unused segments in table[%s].",
segmentsTable
);
final int batchSize = 100;
int totalUpdatedEntries = 0;
// Update the rows in batches of size 100
while (true) {
final List segmentsToUpdate = new ArrayList<>(batchSize);
int numUpdatedRows;
try {
connector.retryWithHandle(
handle -> {
segmentsToUpdate.addAll(handle.createQuery(
StringUtils.format(
"SELECT id FROM %1$s WHERE used_status_last_updated IS NULL and used = :used %2$s",
segmentsTable,
connector.limitClause(batchSize)
)
).bind("used", false).mapTo(String.class).list());
return null;
}
);
if (segmentsToUpdate.isEmpty()) {
break;
}
numUpdatedRows = connector.retryWithHandle(
handle -> {
final Batch updateBatch = handle.createBatch();
final String sql = "UPDATE %1$s SET used_status_last_updated = '%2$s' WHERE id = '%3$s'";
String now = DateTimes.nowUtc().toString();
for (String id : segmentsToUpdate) {
updateBatch.add(StringUtils.format(sql, segmentsTable, now, id));
}
int[] results = updateBatch.execute();
return Arrays.stream(results).sum();
}
);
totalUpdatedEntries += numUpdatedRows;
}
catch (Exception e) {
log.warn(e, "Populating column 'used_status_last_updated' in table[%s] has failed. There may be unused segments with"
+ " NULL values for 'used_status_last_updated' that won't be killed!", segmentsTable);
return;
}
log.debug(
"Updated a batch of [%d] rows in table[%s] with a valid used_status_last_updated date",
segmentsToUpdate.size(), segmentsTable
);
// Do not wait if there are no more segments to update
if (segmentsToUpdate.size() == numUpdatedRows && numUpdatedRows < batchSize) {
break;
}
// Wait for some time before processing the next batch
try {
Thread.sleep(10000);
}
catch (InterruptedException e) {
log.info("Interrupted, exiting!");
Thread.currentThread().interrupt();
}
}
log.info(
"Populated column 'used_status_last_updated' in table[%s] in [%d] rows.",
segmentsTable, totalUpdatedEntries
);
}
private Runnable createPollTaskForStartOrder(long startOrder, PeriodicDatabasePoll periodicDatabasePoll)
{
return () -> {
// If latest poll was an OnDemandDatabasePoll that started less than periodicPollDelay,
// We will wait for (periodicPollDelay - currentTime - LatestOnDemandDatabasePollStartTime) then check again.
try {
long periodicPollDelayNanos = TimeUnit.MILLISECONDS.toNanos(periodicPollDelay.getMillis());
while (latestDatabasePoll != null
&& latestDatabasePoll instanceof OnDemandDatabasePoll
&& ((OnDemandDatabasePoll) latestDatabasePoll).nanosElapsedFromInitiation() < periodicPollDelayNanos) {
long sleepNano = periodicPollDelayNanos
- ((OnDemandDatabasePoll) latestDatabasePoll).nanosElapsedFromInitiation();
TimeUnit.NANOSECONDS.sleep(sleepNano);
}
}
catch (Exception e) {
log.debug(e, "Exception found while waiting for next periodic poll");
}
// poll() is synchronized together with startPollingDatabasePeriodically(), stopPollingDatabasePeriodically() and
// isPollingDatabasePeriodically() to ensure that when stopPollingDatabasePeriodically() exits, poll() won't
// actually run anymore after that (it could only enter the synchronized section and exit immediately because the
// localStartedOrder doesn't match the new currentStartPollingOrder). It's needed to avoid flakiness in
// SqlSegmentsMetadataManagerTest. See https://github.com/apache/druid/issues/6028
ReentrantReadWriteLock.ReadLock lock = startStopPollLock.readLock();
lock.lock();
try {
if (startOrder == currentStartPollingOrder) {
periodicDatabasePoll.lastPollStartTimestampInMs = System.currentTimeMillis();
poll();
periodicDatabasePoll.firstPollCompletionFuture.complete(null);
latestDatabasePoll = periodicDatabasePoll;
} else {
log.debug("startOrder = currentStartPollingOrder = %d, skipping poll()", startOrder);
}
}
catch (Throwable t) {
log.makeAlert(t, "Uncaught exception in %s's polling thread", SqlSegmentsMetadataManager.class).emit();
// Swallow the exception, so that scheduled polling goes on. Leave firstPollFutureSinceLastStart uncompleted
// for now, so that it may be completed during the next poll.
if (!(t instanceof Exception)) {
// Don't try to swallow a Throwable which is not an Exception (that is, a Error).
periodicDatabasePoll.firstPollCompletionFuture.completeExceptionally(t);
throw t;
}
}
finally {
lock.unlock();
}
};
}
@Override
public boolean isPollingDatabasePeriodically()
{
// isPollingDatabasePeriodically() is synchronized together with startPollingDatabasePeriodically(),
// stopPollingDatabasePeriodically() and poll() to ensure that the latest currentStartPollingOrder is always
// visible. readLock should be used to avoid unexpected performance degradation of DruidCoordinator.
ReentrantReadWriteLock.ReadLock lock = startStopPollLock.readLock();
lock.lock();
try {
return currentStartPollingOrder >= 0;
}
finally {
lock.unlock();
}
}
@Override
public void stopPollingDatabasePeriodically()
{
ReentrantReadWriteLock.WriteLock lock = startStopPollLock.writeLock();
lock.lock();
try {
if (!isPollingDatabasePeriodically()) {
return;
}
periodicPollTaskFuture.cancel(false);
latestDatabasePoll = null;
// NOT nulling dataSourcesSnapshot, allowing to query the latest polled data even when this SegmentsMetadataManager
// object is stopped.
currentStartPollingOrder = -1;
}
finally {
lock.unlock();
}
}
private void useLatestIfWithinDelayOrPerformNewDatabasePoll()
{
// Double-checked locking with useLatestSnapshotIfWithinDelay() call playing the role of the "check".
if (useLatestSnapshotIfWithinDelay()) {
return;
}
ReentrantReadWriteLock.WriteLock lock = startStopPollLock.writeLock();
lock.lock();
try {
if (useLatestSnapshotIfWithinDelay()) {
return;
}
OnDemandDatabasePoll onDemandDatabasePoll = new OnDemandDatabasePoll();
this.latestDatabasePoll = onDemandDatabasePoll;
doOnDemandPoll(onDemandDatabasePoll);
}
finally {
lock.unlock();
}
}
/**
* This method returns true without waiting for database poll if the latest {@link DatabasePoll} is a
* {@link PeriodicDatabasePoll} that has completed it's first poll, or an {@link OnDemandDatabasePoll} that is
* made not longer than {@link #periodicPollDelay} from current time.
* This method does wait untill completion for if the latest {@link DatabasePoll} is a
* {@link PeriodicDatabasePoll} that has not completed it's first poll, or an {@link OnDemandDatabasePoll} that is
* already in the process of polling the database.
* This means that any method using this check can read from snapshot that is
* up to {@link SqlSegmentsMetadataManager#periodicPollDelay} old.
*/
@VisibleForTesting
boolean useLatestSnapshotIfWithinDelay()
{
DatabasePoll latestDatabasePoll = this.latestDatabasePoll;
if (latestDatabasePoll instanceof PeriodicDatabasePoll) {
Futures.getUnchecked(((PeriodicDatabasePoll) latestDatabasePoll).firstPollCompletionFuture);
return true;
}
if (latestDatabasePoll instanceof OnDemandDatabasePoll) {
long periodicPollDelayNanos = TimeUnit.MILLISECONDS.toNanos(periodicPollDelay.getMillis());
OnDemandDatabasePoll latestOnDemandPoll = (OnDemandDatabasePoll) latestDatabasePoll;
boolean latestDatabasePollIsFresh = latestOnDemandPoll.nanosElapsedFromInitiation() < periodicPollDelayNanos;
if (latestDatabasePollIsFresh) {
Futures.getUnchecked(latestOnDemandPoll.pollCompletionFuture);
return true;
}
// Latest on-demand poll is not fresh. Fall through to return false from this method.
} else {
assert latestDatabasePoll == null;
// No periodic database polls and no on-demand poll have been done yet, nothing to await for.
}
return false;
}
/**
* This method will always force a database poll if there is no ongoing database poll. This method will then
* waits for the new poll or the ongoing poll to completes before returning.
* This means that any method using this check can be sure that the latest poll for the snapshot was completed after
* this method was called.
*/
@VisibleForTesting
void forceOrWaitOngoingDatabasePoll()
{
long checkStartTime = System.currentTimeMillis();
ReentrantReadWriteLock.WriteLock lock = startStopPollLock.writeLock();
lock.lock();
try {
DatabasePoll latestDatabasePoll = this.latestDatabasePoll;
try {
//Verify if there was a periodic poll completed while we were waiting for the lock
if (latestDatabasePoll instanceof PeriodicDatabasePoll
&& ((PeriodicDatabasePoll) latestDatabasePoll).lastPollStartTimestampInMs > checkStartTime) {
return;
}
// Verify if there was a on-demand poll completed while we were waiting for the lock
if (latestDatabasePoll instanceof OnDemandDatabasePoll) {
long checkStartTimeNanos = TimeUnit.MILLISECONDS.toNanos(checkStartTime);
OnDemandDatabasePoll latestOnDemandPoll = (OnDemandDatabasePoll) latestDatabasePoll;
if (latestOnDemandPoll.initiationTimeNanos > checkStartTimeNanos) {
return;
}
}
}
catch (Exception e) {
// Latest poll was unsuccessful, try to do a new poll
log.debug(e, "Latest poll was unsuccessful. Starting a new poll...");
}
// Force a database poll
OnDemandDatabasePoll onDemandDatabasePoll = new OnDemandDatabasePoll();
this.latestDatabasePoll = onDemandDatabasePoll;
doOnDemandPoll(onDemandDatabasePoll);
}
finally {
lock.unlock();
}
}
private void doOnDemandPoll(OnDemandDatabasePoll onDemandPoll)
{
try {
poll();
onDemandPoll.pollCompletionFuture.complete(null);
}
catch (Throwable t) {
onDemandPoll.pollCompletionFuture.completeExceptionally(t);
throw t;
}
}
@Override
public boolean markSegmentAsUsed(final String segmentId)
{
try {
int numUpdatedDatabaseEntries = connector.getDBI().withHandle(
handle -> handle
.createStatement(StringUtils.format("UPDATE %s SET used=true, used_status_last_updated = :used_status_last_updated WHERE id = :id", getSegmentsTable()))
.bind("id", segmentId)
.bind("used_status_last_updated", DateTimes.nowUtc().toString())
.execute()
);
// Unlike bulk markAsUsed methods: markAsUsedAllNonOvershadowedSegmentsInDataSource(),
// markAsUsedNonOvershadowedSegmentsInInterval(), and markAsUsedNonOvershadowedSegments() we don't put the marked
// segment into the respective data source, because we don't have it fetched from the database. It's probably not
// worth complicating the implementation and making two database queries just to add the segment because it will
// be anyway fetched during the next poll(). Segment putting that is done in the bulk markAsUsed methods is a nice
// to have thing, but doesn't formally affect the external guarantees of SegmentsMetadataManager class.
return numUpdatedDatabaseEntries > 0;
}
catch (RuntimeException e) {
log.error(e, "Exception marking segment %s as used", segmentId);
throw e;
}
}
@Override
public int markAsUsedAllNonOvershadowedSegmentsInDataSource(final String dataSource)
{
return doMarkAsUsedNonOvershadowedSegments(dataSource, null, null);
}
@Override
public int markAsUsedNonOvershadowedSegmentsInInterval(
final String dataSource,
final Interval interval,
@Nullable final List versions
)
{
Preconditions.checkNotNull(interval);
return doMarkAsUsedNonOvershadowedSegments(dataSource, interval, versions);
}
private int doMarkAsUsedNonOvershadowedSegments(
final String dataSourceName,
final @Nullable Interval interval,
final @Nullable List versions
)
{
final List unusedSegments = new ArrayList<>();
final SegmentTimeline timeline = new SegmentTimeline();
connector.inReadOnlyTransaction(
(handle, status) -> {
final SqlSegmentsMetadataQuery queryTool =
SqlSegmentsMetadataQuery.forHandle(handle, connector, dbTables.get(), jsonMapper);
final List intervals =
interval == null ? Intervals.ONLY_ETERNITY : Collections.singletonList(interval);
try (final CloseableIterator iterator =
queryTool.retrieveUsedSegments(dataSourceName, intervals, versions)) {
timeline.addSegments(iterator);
}
try (final CloseableIterator iterator =
queryTool.retrieveUnusedSegments(dataSourceName, intervals, versions, null, null, null, null)) {
while (iterator.hasNext()) {
final DataSegment dataSegment = iterator.next();
timeline.addSegments(Iterators.singletonIterator(dataSegment));
unusedSegments.add(dataSegment);
}
}
//noinspection ReturnOfNull: This consumer operates by side effects
return null;
}
);
return markNonOvershadowedSegmentsAsUsed(unusedSegments, timeline);
}
private int markNonOvershadowedSegmentsAsUsed(
List unusedSegments,
SegmentTimeline timeline
)
{
List segmentIdsToMarkAsUsed = new ArrayList<>();
for (DataSegment segment : unusedSegments) {
if (!timeline.isOvershadowed(segment)) {
segmentIdsToMarkAsUsed.add(segment.getId());
}
}
return markSegmentsAsUsed(segmentIdsToMarkAsUsed);
}
@Override
public int markAsUsedNonOvershadowedSegments(final String dataSource, final Set segmentIds)
{
try {
Pair, SegmentTimeline> unusedSegmentsAndTimeline = connector
.inReadOnlyTransaction(
(handle, status) -> {
List unusedSegments = retrieveUnusedSegments(dataSource, segmentIds, handle);
List unusedSegmentsIntervals = JodaUtils.condenseIntervals(
unusedSegments.stream().map(DataSegment::getInterval).collect(Collectors.toList())
);
try (CloseableIterator usedSegmentsOverlappingUnusedSegmentsIntervals =
retrieveUsedSegmentsOverlappingIntervals(dataSource, unusedSegmentsIntervals, handle)) {
SegmentTimeline timeline = SegmentTimeline.forSegments(
Iterators.concat(usedSegmentsOverlappingUnusedSegmentsIntervals, unusedSegments.iterator())
);
return new Pair<>(unusedSegments, timeline);
}
}
);
List unusedSegments = unusedSegmentsAndTimeline.lhs;
SegmentTimeline timeline = unusedSegmentsAndTimeline.rhs;
return markNonOvershadowedSegmentsAsUsed(unusedSegments, timeline);
}
catch (Exception e) {
Throwable rootCause = Throwables.getRootCause(e);
if (rootCause instanceof DruidException) {
throw (DruidException) rootCause;
} else {
throw e;
}
}
}
private List retrieveUnusedSegments(
final String dataSource,
final Set segmentIds,
final Handle handle
)
{
final List retrievedSegments = SqlSegmentsMetadataQuery
.forHandle(handle, connector, dbTables.get(), jsonMapper)
.retrieveSegmentsById(dataSource, segmentIds);
final Set unknownSegmentIds = new HashSet<>(segmentIds);
final List unusedSegments = new ArrayList<>();
for (DataSegmentPlus entry : retrievedSegments) {
final DataSegment segment = entry.getDataSegment();
unknownSegmentIds.remove(segment.getId().toString());
if (Boolean.FALSE.equals(entry.getUsed())) {
unusedSegments.add(segment);
}
}
if (!unknownSegmentIds.isEmpty()) {
throw InvalidInput.exception(
"Could not find segment IDs[%s] for datasource[%s]",
unknownSegmentIds, dataSource
);
}
return unusedSegments;
}
private CloseableIterator retrieveUsedSegmentsOverlappingIntervals(
final String dataSource,
final Collection intervals,
final Handle handle
)
{
return SqlSegmentsMetadataQuery.forHandle(handle, connector, dbTables.get(), jsonMapper)
.retrieveUsedSegments(dataSource, intervals);
}
private int markSegmentsAsUsed(final List segmentIds)
{
if (segmentIds.isEmpty()) {
log.info("No segments found to mark as used.");
return 0;
}
return connector.getDBI().withHandle(
handle ->
SqlSegmentsMetadataQuery.forHandle(handle, connector, dbTables.get(), jsonMapper)
.markSegments(segmentIds, true)
);
}
@Override
public int markAsUnusedAllSegmentsInDataSource(final String dataSource)
{
try {
return connector.getDBI().withHandle(
handle ->
SqlSegmentsMetadataQuery.forHandle(handle, connector, dbTables.get(), jsonMapper)
.markSegmentsUnused(dataSource, Intervals.ETERNITY)
);
}
catch (RuntimeException e) {
log.error(e, "Exception marking all segments as unused in data source [%s]", dataSource);
throw e;
}
}
/**
* This method does not update {@link #dataSourcesSnapshot}, see the comments in {@link #doPoll()} about
* snapshot update. The update of the segment's state will be reflected after the next {@link DatabasePoll}.
*/
@Override
public boolean markSegmentAsUnused(final SegmentId segmentId)
{
try {
final int numSegments = connector.getDBI().withHandle(
handle ->
SqlSegmentsMetadataQuery.forHandle(handle, connector, dbTables.get(), jsonMapper)
.markSegments(Collections.singletonList(segmentId), false)
);
return numSegments > 0;
}
catch (RuntimeException e) {
log.error(e, "Exception marking segment [%s] as unused", segmentId);
throw e;
}
}
@Override
public int markSegmentsAsUnused(Set segmentIds)
{
return connector.getDBI().withHandle(
handle ->
SqlSegmentsMetadataQuery.forHandle(handle, connector, dbTables.get(), jsonMapper)
.markSegments(segmentIds, false)
);
}
@Override
public int markAsUnusedSegmentsInInterval(
final String dataSource,
final Interval interval,
@Nullable final List versions
)
{
Preconditions.checkNotNull(interval);
try {
return connector.getDBI().withHandle(
handle ->
SqlSegmentsMetadataQuery.forHandle(handle, connector, dbTables.get(), jsonMapper)
.markSegmentsUnused(dataSource, interval, versions)
);
}
catch (Exception e) {
throw new RuntimeException(e);
}
}
@Override
public @Nullable ImmutableDruidDataSource getImmutableDataSourceWithUsedSegments(String dataSourceName)
{
return getSnapshotOfDataSourcesWithAllUsedSegments().getDataSource(dataSourceName);
}
@Override
public Collection getImmutableDataSourcesWithAllUsedSegments()
{
return getSnapshotOfDataSourcesWithAllUsedSegments().getDataSourcesWithAllUsedSegments();
}
@Override
public DataSourcesSnapshot getSnapshotOfDataSourcesWithAllUsedSegments()
{
useLatestIfWithinDelayOrPerformNewDatabasePoll();
return dataSourcesSnapshot;
}
@VisibleForTesting
DataSourcesSnapshot getDataSourcesSnapshot()
{
return dataSourcesSnapshot;
}
@VisibleForTesting
DatabasePoll getLatestDatabasePoll()
{
return latestDatabasePoll;
}
@Override
public Iterable iterateAllUsedSegments()
{
useLatestIfWithinDelayOrPerformNewDatabasePoll();
return dataSourcesSnapshot.iterateAllUsedSegmentsInSnapshot();
}
@Override
public Optional> iterateAllUsedNonOvershadowedSegmentsForDatasourceInterval(
String datasource,
Interval interval,
boolean requiresLatest
)
{
if (requiresLatest) {
forceOrWaitOngoingDatabasePoll();
} else {
useLatestIfWithinDelayOrPerformNewDatabasePoll();
}
SegmentTimeline usedSegmentsTimeline
= dataSourcesSnapshot.getUsedSegmentsTimelinesPerDataSource().get(datasource);
return Optional.fromNullable(usedSegmentsTimeline)
.transform(timeline -> timeline.findNonOvershadowedObjectsInInterval(interval, Partitions.ONLY_COMPLETE));
}
/**
* Retrieves segments and their associated metadata for a given datasource that are marked unused and that are
* *fully contained by* an optionally specified interval. If the interval specified is null, this method will
* retrieve all unused segments.
*
* This call does not return any information about realtime segments.
*
* @param datasource The name of the datasource
* @param interval an optional interval to search over.
* @param limit an optional maximum number of results to return. If none is specified, the results are
* not limited.
* @param lastSegmentId an optional last segment id from which to search for results. All segments returned are >
* this segment lexigraphically if sortOrder is null or {@link SortOrder#ASC}, or < this
* segment lexigraphically if sortOrder is {@link SortOrder#DESC}. If none is specified, no
* such filter is used.
* @param sortOrder an optional order with which to return the matching segments by id, start time, end time. If
* none is specified, the order of the results is not guarenteed.
* Returns an iterable.
*/
@Override
public Iterable iterateAllUnusedSegmentsForDatasource(
final String datasource,
@Nullable final Interval interval,
@Nullable final Integer limit,
@Nullable final String lastSegmentId,
@Nullable final SortOrder sortOrder
)
{
return connector.inReadOnlyTransaction(
(handle, status) -> {
final SqlSegmentsMetadataQuery queryTool =
SqlSegmentsMetadataQuery.forHandle(handle, connector, dbTables.get(), jsonMapper);
final List intervals =
interval == null
? Intervals.ONLY_ETERNITY
: Collections.singletonList(interval);
try (final CloseableIterator iterator =
queryTool.retrieveUnusedSegmentsPlus(datasource, intervals, null, limit, lastSegmentId, sortOrder, null)) {
return ImmutableList.copyOf(iterator);
}
}
);
}
@Override
public Set retrieveAllDataSourceNames()
{
return connector.getDBI().withHandle(
handle -> handle
.createQuery(StringUtils.format("SELECT DISTINCT(datasource) FROM %s", getSegmentsTable()))
.fold(
new HashSet<>(),
(Set druidDataSources,
Map stringObjectMap,
FoldController foldController,
StatementContext statementContext) -> {
druidDataSources.add(MapUtils.getString(stringObjectMap, "datasource"));
return druidDataSources;
}
)
);
}
@VisibleForTesting
void poll()
{
// See the comment to the pollLock field, explaining this synchronized block
synchronized (pollLock) {
doPoll();
}
}
/** This method is extracted from {@link #poll()} solely to reduce code nesting. */
@GuardedBy("pollLock")
private void doPoll()
{
if (centralizedDatasourceSchemaConfig.isEnabled()) {
doPollSegmentAndSchema();
} else {
doPollSegments();
}
}
private void doPollSegments()
{
final Stopwatch stopwatch = Stopwatch.createStarted();
// Some databases such as PostgreSQL require auto-commit turned off
// to stream results back, enabling transactions disables auto-commit
// setting connection to read-only will allow some database such as MySQL
// to automatically use read-only transaction mode, further optimizing the query
final List segments = connector.inReadOnlyTransaction(
(handle, status) -> handle
.createQuery(StringUtils.format("SELECT payload FROM %s WHERE used=true", getSegmentsTable()))
.setFetchSize(connector.getStreamingFetchSize())
.map((index, r, ctx) -> {
try {
DataSegment segment = jsonMapper.readValue(r.getBytes("payload"), DataSegment.class);
return replaceWithExistingSegmentIfPresent(segment);
}
catch (IOException e) {
log.makeAlert(e, "Failed to read segment from db.").emit();
// If one entry in database is corrupted doPoll() should continue to work overall. See
// filter by `Objects::nonNull` below in this method.
return null;
}
}).list()
);
Preconditions.checkNotNull(
segments,
"Unexpected 'null' when polling segments from the db, aborting snapshot update."
);
stopwatch.stop();
emitMetric("segment/poll/time", stopwatch.millisElapsed());
log.info(
"Polled and found [%,d] segments in the database in [%,d]ms.",
segments.size(), stopwatch.millisElapsed()
);
createDatasourcesSnapshot(segments);
}
private void doPollSegmentAndSchema()
{
final Stopwatch stopwatch = Stopwatch.createStarted();
ImmutableMap.Builder segmentMetadataBuilder = new ImmutableMap.Builder<>();
// We are emitting the stats here since this method is called periodically.
// Secondly, the stats are emitted before polling the schema,
// as {@link SegmentSchemaCache#resetInTransitSMQResultPublishedOnDBPoll} call after schema poll clears some cached information.
segmentSchemaCache.emitStats();
// some databases such as PostgreSQL require auto-commit turned off
// to stream results back, enabling transactions disables auto-commit
//
// setting connection to read-only will allow some database such as MySQL
// to automatically use read-only transaction mode, further optimizing the query
final List segments = connector.inReadOnlyTransaction(
new TransactionCallback>()
{
@Override
public List inTransaction(Handle handle, TransactionStatus status)
{
return handle
.createQuery(StringUtils.format("SELECT payload, schema_fingerprint, num_rows FROM %s WHERE used=true", getSegmentsTable()))
.setFetchSize(connector.getStreamingFetchSize())
.map(
(index, r, ctx) -> {
try {
DataSegment segment = jsonMapper.readValue(r.getBytes("payload"), DataSegment.class);
Long numRows = (Long) r.getObject("num_rows");
String schemaFingerprint = r.getString("schema_fingerprint");
if (schemaFingerprint != null && numRows != null) {
segmentMetadataBuilder.put(
segment.getId(),
new SegmentMetadata(numRows, schemaFingerprint)
);
}
return replaceWithExistingSegmentIfPresent(segment);
}
catch (IOException e) {
log.makeAlert(e, "Failed to read segment from db.").emit();
// If one entry in database is corrupted doPoll() should continue to work overall. See
// filter by `Objects::nonNull` below in this method.
return null;
}
}
)
.list();
}
}
);
ImmutableMap.Builder schemaMapBuilder = new ImmutableMap.Builder<>();
final String schemaPollQuery =
StringUtils.format(
"SELECT fingerprint, payload FROM %s WHERE version = %s",
getSegmentSchemaTable(),
CentralizedDatasourceSchemaConfig.SCHEMA_VERSION
);
connector.inReadOnlyTransaction(
(handle, status) -> {
handle.createQuery(schemaPollQuery)
.setFetchSize(connector.getStreamingFetchSize())
.map((index, r, ctx) -> {
try {
schemaMapBuilder.put(
r.getString("fingerprint"),
jsonMapper.readValue(r.getBytes("payload"), SchemaPayload.class)
);
}
catch (IOException e) {
log.makeAlert(e, "Failed to read schema from db.").emit();
}
return null;
}).list();
return null;
});
ImmutableMap schemaMap = schemaMapBuilder.build();
segmentSchemaCache.updateFinalizedSegmentSchema(
new SegmentSchemaCache.FinalizedSegmentSchemaInfo(segmentMetadataBuilder.build(), schemaMap)
);
Preconditions.checkNotNull(
segments,
"Unexpected 'null' when polling segments from the db, aborting snapshot update."
);
stopwatch.stop();
emitMetric("segment/pollWithSchema/time", stopwatch.millisElapsed());
log.info(
"Polled and found [%,d] segments and [%,d] schemas in the database in [%,d]ms.",
segments.size(), schemaMap.size(), stopwatch.millisElapsed()
);
createDatasourcesSnapshot(segments);
}
private void emitMetric(String metricName, long value)
{
serviceEmitter.emit(new ServiceMetricEvent.Builder().setMetric(metricName, value));
}
private void createDatasourcesSnapshot(List segments)
{
final Stopwatch stopwatch = Stopwatch.createStarted();
// dataSourcesSnapshot is updated only here and the DataSourcesSnapshot object is immutable. If data sources or
// segments are marked as used or unused directly (via markAs...() methods in SegmentsMetadataManager), the
// dataSourcesSnapshot can become invalid until the next database poll.
// DataSourcesSnapshot computes the overshadowed segments, which makes it an expensive operation if the
// snapshot was invalidated on each segment mark as unused or used, especially if a user issues a lot of single
// segment mark calls in rapid succession. So the snapshot update is not done outside of database poll at this time.
// Updates outside of database polls were primarily for the user experience, so users would immediately see the
// effect of a segment mark call reflected in MetadataResource API calls.
ImmutableMap dataSourceProperties = createDefaultDataSourceProperties();
dataSourcesSnapshot = DataSourcesSnapshot.fromUsedSegments(
Iterables.filter(segments, Objects::nonNull), // Filter corrupted entries (see above in this method).
dataSourceProperties
);
emitMetric("segment/buildSnapshot/time", stopwatch.millisElapsed());
log.debug(
"Created snapshot from polled segments in [%d]ms. Found [%d] overshadowed segments.",
stopwatch.millisElapsed(), dataSourcesSnapshot.getOvershadowedSegments().size()
);
}
private static ImmutableMap createDefaultDataSourceProperties()
{
return ImmutableMap.of("created", DateTimes.nowUtc().toString());
}
/**
* For the garbage collector in Java, it's better to keep new objects short-living, but once they are old enough
* (i. e. promoted to old generation), try to keep them alive. In {@link #poll()}, we fetch and deserialize all
* existing segments each time, and then replace them in {@link #dataSourcesSnapshot}. This method allows to use
* already existing (old) segments when possible, effectively interning them a-la {@link String#intern} or {@link
* com.google.common.collect.Interner}, aiming to make the majority of {@link DataSegment} objects garbage soon after
* they are deserialized and to die in young generation. It allows to avoid fragmentation of the old generation and
* full GCs.
*/
private DataSegment replaceWithExistingSegmentIfPresent(DataSegment segment)
{
@MonotonicNonNull DataSourcesSnapshot dataSourcesSnapshot = this.dataSourcesSnapshot;
if (dataSourcesSnapshot == null) {
return segment;
}
@Nullable ImmutableDruidDataSource dataSource = dataSourcesSnapshot.getDataSource(segment.getDataSource());
if (dataSource == null) {
return segment;
}
DataSegment alreadyExistingSegment = dataSource.getSegment(segment.getId());
return alreadyExistingSegment != null ? alreadyExistingSegment : segment;
}
private String getSegmentsTable()
{
return dbTables.get().getSegmentsTable();
}
private String getSegmentSchemaTable()
{
return dbTables.get().getSegmentSchemasTable();
}
@Override
public List getUnusedSegmentIntervals(
final String dataSource,
@Nullable final DateTime minStartTime,
final DateTime maxEndTime,
final int limit,
final DateTime maxUsedStatusLastUpdatedTime
)
{
// Note that we handle the case where used_status_last_updated IS NULL here to allow smooth transition to Druid version that uses used_status_last_updated column
return connector.inReadOnlyTransaction(
new TransactionCallback>()
{
@Override
public List inTransaction(Handle handle, TransactionStatus status)
{
final Query sql = handle
.createQuery(
StringUtils.format(
"SELECT start, %2$send%2$s FROM %1$s WHERE dataSource = :dataSource AND "
+ "%2$send%2$s <= :end AND used = false AND used_status_last_updated IS NOT NULL AND used_status_last_updated <= :used_status_last_updated %3$s ORDER BY start, %2$send%2$s",
getSegmentsTable(),
connector.getQuoteString(),
null != minStartTime ? "AND start >= :start" : ""
)
)
.setFetchSize(connector.getStreamingFetchSize())
.setMaxRows(limit)
.bind("dataSource", dataSource)
.bind("end", maxEndTime.toString())
.bind("used_status_last_updated", maxUsedStatusLastUpdatedTime.toString())
.map(
new BaseResultSetMapper()
{
@Override
protected Interval mapInternal(int index, Map row)
{
return new Interval(
DateTimes.of((String) row.get("start")),
DateTimes.of((String) row.get("end"))
);
}
}
);
if (null != minStartTime) {
sql.bind("start", minStartTime.toString());
}
Iterator iter = sql.iterator();
List result = Lists.newArrayListWithCapacity(limit);
for (int i = 0; i < limit && iter.hasNext(); i++) {
try {
result.add(iter.next());
}
catch (Exception e) {
throw new RuntimeException(e);
}
}
return result;
}
}
);
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy