Many resources are needed to download a project. Please understand that we have to compensate our server costs. Thank you in advance. Project price only 1 $
You can buy this project and download/modify it how often you want.
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.apache.druid.indexing.common.task;
import com.fasterxml.jackson.annotation.JacksonInject;
import com.fasterxml.jackson.annotation.JsonCreator;
import com.fasterxml.jackson.annotation.JsonIgnore;
import com.fasterxml.jackson.annotation.JsonProperty;
import com.fasterxml.jackson.databind.ObjectMapper;
import com.google.common.annotations.VisibleForTesting;
import com.google.common.base.Preconditions;
import com.google.common.collect.BiMap;
import com.google.common.collect.HashBiMap;
import com.google.common.collect.Lists;
import org.apache.druid.client.coordinator.CoordinatorClient;
import org.apache.druid.data.input.impl.DimensionSchema;
import org.apache.druid.data.input.impl.DimensionSchema.MultiValueHandling;
import org.apache.druid.data.input.impl.DimensionsSpec;
import org.apache.druid.data.input.impl.DoubleDimensionSchema;
import org.apache.druid.data.input.impl.FloatDimensionSchema;
import org.apache.druid.data.input.impl.InputRowParser;
import org.apache.druid.data.input.impl.LongDimensionSchema;
import org.apache.druid.data.input.impl.NoopInputRowParser;
import org.apache.druid.data.input.impl.StringDimensionSchema;
import org.apache.druid.data.input.impl.TimeAndDimsParseSpec;
import org.apache.druid.indexer.TaskStatus;
import org.apache.druid.indexer.partitions.DynamicPartitionsSpec;
import org.apache.druid.indexer.partitions.HashedPartitionsSpec;
import org.apache.druid.indexing.common.RetryPolicyFactory;
import org.apache.druid.indexing.common.SegmentLoaderFactory;
import org.apache.druid.indexing.common.TaskToolbox;
import org.apache.druid.indexing.common.actions.SegmentListUsedAction;
import org.apache.druid.indexing.common.actions.TaskActionClient;
import org.apache.druid.indexing.common.stats.RowIngestionMetersFactory;
import org.apache.druid.indexing.common.task.IndexTask.IndexIOConfig;
import org.apache.druid.indexing.common.task.IndexTask.IndexIngestionSpec;
import org.apache.druid.indexing.common.task.IndexTask.IndexTuningConfig;
import org.apache.druid.indexing.firehose.IngestSegmentFirehoseFactory;
import org.apache.druid.java.util.common.IAE;
import org.apache.druid.java.util.common.ISE;
import org.apache.druid.java.util.common.JodaUtils;
import org.apache.druid.java.util.common.Numbers;
import org.apache.druid.java.util.common.Pair;
import org.apache.druid.java.util.common.RE;
import org.apache.druid.java.util.common.StringUtils;
import org.apache.druid.java.util.common.granularity.Granularities;
import org.apache.druid.java.util.common.granularity.Granularity;
import org.apache.druid.java.util.common.granularity.GranularityType;
import org.apache.druid.java.util.common.guava.Comparators;
import org.apache.druid.java.util.common.jackson.JacksonUtils;
import org.apache.druid.java.util.common.logger.Logger;
import org.apache.druid.query.aggregation.AggregatorFactory;
import org.apache.druid.segment.DimensionHandler;
import org.apache.druid.segment.IndexIO;
import org.apache.druid.segment.QueryableIndex;
import org.apache.druid.segment.column.ColumnHolder;
import org.apache.druid.segment.column.ValueType;
import org.apache.druid.segment.indexing.DataSchema;
import org.apache.druid.segment.indexing.granularity.GranularitySpec;
import org.apache.druid.segment.indexing.granularity.UniformGranularitySpec;
import org.apache.druid.segment.loading.SegmentLoadingException;
import org.apache.druid.segment.realtime.appenderator.AppenderatorsManager;
import org.apache.druid.segment.realtime.firehose.ChatHandlerProvider;
import org.apache.druid.server.coordinator.DataSourceCompactionConfig;
import org.apache.druid.server.security.AuthorizerMapper;
import org.apache.druid.timeline.DataSegment;
import org.apache.druid.timeline.TimelineLookup;
import org.apache.druid.timeline.TimelineObjectHolder;
import org.apache.druid.timeline.VersionedIntervalTimeline;
import org.apache.druid.timeline.partition.PartitionChunk;
import org.apache.druid.timeline.partition.PartitionHolder;
import org.joda.time.Interval;
import javax.annotation.Nullable;
import java.io.File;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.Comparator;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Map.Entry;
import java.util.TreeMap;
import java.util.stream.Collectors;
import java.util.stream.IntStream;
import java.util.stream.StreamSupport;
public class CompactionTask extends AbstractBatchIndexTask
{
/**
* The CompactionTask creates and runs multiple IndexTask instances. When the {@link AppenderatorsManager}
* is asked to clean up, it does so on a per-task basis keyed by task ID. However, the subtask IDs of the
* CompactionTask are not externally visible. This context flag is used to ensure that all the appenderators
* created for the CompactionTasks's subtasks are tracked under the ID of the parent CompactionTask.
* The CompactionTask may change in the future and no longer require this behavior (e.g., reusing the same
* Appenderator across subtasks, or allowing the subtasks to use the same ID). The CompactionTask is also the only
* task type that currently creates multiple appenderators. Thus, a context flag is used to handle this case
* instead of a more general approach such as new methods on the Task interface.
*/
public static final String CTX_KEY_APPENDERATOR_TRACKING_TASK_ID = "appenderatorTrackingTaskId";
private static final Logger log = new Logger(CompactionTask.class);
private static final String TYPE = "compact";
private final Interval interval;
private final List segments;
@Nullable
private final DimensionsSpec dimensionsSpec;
@Nullable
private final AggregatorFactory[] metricsSpec;
@Nullable
private final Granularity segmentGranularity;
@Nullable
private final Long targetCompactionSizeBytes;
@Nullable
private final IndexTuningConfig tuningConfig;
private final ObjectMapper jsonMapper;
@JsonIgnore
private final SegmentProvider segmentProvider;
@JsonIgnore
private final PartitionConfigurationManager partitionConfigurationManager;
@JsonIgnore
private final AuthorizerMapper authorizerMapper;
@JsonIgnore
private final ChatHandlerProvider chatHandlerProvider;
@JsonIgnore
private final RowIngestionMetersFactory rowIngestionMetersFactory;
@JsonIgnore
private final CoordinatorClient coordinatorClient;
@JsonIgnore
private final SegmentLoaderFactory segmentLoaderFactory;
@JsonIgnore
private final RetryPolicyFactory retryPolicyFactory;
@JsonIgnore
private final AppenderatorsManager appenderatorsManager;
@JsonIgnore
private final CurrentSubTaskHolder currentSubTaskHolder = new CurrentSubTaskHolder(
(taskObject, config) -> {
final IndexTask indexTask = (IndexTask) taskObject;
indexTask.stopGracefully(config);
}
);
@JsonIgnore
private List indexTaskSpecs;
@JsonCreator
public CompactionTask(
@JsonProperty("id") final String id,
@JsonProperty("resource") final TaskResource taskResource,
@JsonProperty("dataSource") final String dataSource,
@JsonProperty("interval") @Nullable final Interval interval,
@JsonProperty("segments") @Nullable final List segments,
@JsonProperty("dimensions") @Nullable final DimensionsSpec dimensions,
@JsonProperty("dimensionsSpec") @Nullable final DimensionsSpec dimensionsSpec,
@JsonProperty("metricsSpec") @Nullable final AggregatorFactory[] metricsSpec,
@JsonProperty("segmentGranularity") @Nullable final Granularity segmentGranularity,
@JsonProperty("targetCompactionSizeBytes") @Nullable final Long targetCompactionSizeBytes,
@JsonProperty("tuningConfig") @Nullable final IndexTuningConfig tuningConfig,
@JsonProperty("context") @Nullable final Map context,
@JacksonInject ObjectMapper jsonMapper,
@JacksonInject AuthorizerMapper authorizerMapper,
@JacksonInject ChatHandlerProvider chatHandlerProvider,
@JacksonInject RowIngestionMetersFactory rowIngestionMetersFactory,
@JacksonInject CoordinatorClient coordinatorClient,
@JacksonInject SegmentLoaderFactory segmentLoaderFactory,
@JacksonInject RetryPolicyFactory retryPolicyFactory,
@JacksonInject AppenderatorsManager appenderatorsManager
)
{
super(getOrMakeId(id, TYPE, dataSource), null, taskResource, dataSource, context);
Preconditions.checkArgument(interval != null || segments != null, "interval or segments should be specified");
Preconditions.checkArgument(interval == null || segments == null, "one of interval and segments should be null");
if (interval != null && interval.toDurationMillis() == 0) {
throw new IAE("Interval[%s] is empty, must specify a nonempty interval", interval);
}
this.interval = interval;
this.segments = segments;
this.dimensionsSpec = dimensionsSpec == null ? dimensions : dimensionsSpec;
this.metricsSpec = metricsSpec;
this.segmentGranularity = segmentGranularity;
this.targetCompactionSizeBytes = targetCompactionSizeBytes;
this.tuningConfig = tuningConfig;
this.jsonMapper = jsonMapper;
this.segmentProvider = segments == null ? new SegmentProvider(dataSource, interval) : new SegmentProvider(segments);
this.partitionConfigurationManager = new PartitionConfigurationManager(targetCompactionSizeBytes, tuningConfig);
this.authorizerMapper = authorizerMapper;
this.chatHandlerProvider = chatHandlerProvider;
this.rowIngestionMetersFactory = rowIngestionMetersFactory;
this.coordinatorClient = coordinatorClient;
this.segmentLoaderFactory = segmentLoaderFactory;
this.retryPolicyFactory = retryPolicyFactory;
this.appenderatorsManager = appenderatorsManager;
}
@JsonProperty
public Interval getInterval()
{
return interval;
}
@JsonProperty
public List getSegments()
{
return segments;
}
@JsonProperty
@Nullable
public DimensionsSpec getDimensionsSpec()
{
return dimensionsSpec;
}
@JsonProperty
@Nullable
public AggregatorFactory[] getMetricsSpec()
{
return metricsSpec;
}
@JsonProperty
@Nullable
@Override
public Granularity getSegmentGranularity()
{
return segmentGranularity;
}
@Nullable
@JsonProperty
public Long getTargetCompactionSizeBytes()
{
return targetCompactionSizeBytes;
}
@Nullable
@JsonProperty
public IndexTuningConfig getTuningConfig()
{
return tuningConfig;
}
@Override
public String getType()
{
return TYPE;
}
@Override
public int getPriority()
{
return getContextValue(Tasks.PRIORITY_KEY, Tasks.DEFAULT_MERGE_TASK_PRIORITY);
}
@Override
public boolean isReady(TaskActionClient taskActionClient) throws Exception
{
final List segments = segmentProvider.checkAndGetSegments(taskActionClient);
return determineLockGranularityandTryLockWithSegments(taskActionClient, segments);
}
@Override
public boolean requireLockExistingSegments()
{
return true;
}
@Override
public List findSegmentsToLock(TaskActionClient taskActionClient, List intervals)
throws IOException
{
return taskActionClient.submit(new SegmentListUsedAction(getDataSource(), null, intervals));
}
@Override
public boolean isPerfectRollup()
{
return tuningConfig != null && tuningConfig.isForceGuaranteedRollup();
}
@Override
public TaskStatus runTask(TaskToolbox toolbox) throws Exception
{
if (indexTaskSpecs == null) {
final List ingestionSpecs = createIngestionSchema(
toolbox,
segmentProvider,
partitionConfigurationManager,
dimensionsSpec,
metricsSpec,
segmentGranularity,
jsonMapper,
coordinatorClient,
segmentLoaderFactory,
retryPolicyFactory
);
indexTaskSpecs = IntStream
.range(0, ingestionSpecs.size())
.mapToObj(i -> new IndexTask(
createIndexTaskSpecId(i),
getGroupId(),
getTaskResource(),
getDataSource(),
ingestionSpecs.get(i),
createContextForSubtask(),
authorizerMapper,
chatHandlerProvider,
rowIngestionMetersFactory,
appenderatorsManager
))
.collect(Collectors.toList());
}
if (indexTaskSpecs.isEmpty()) {
log.warn("Interval[%s] has no segments, nothing to do.", interval);
return TaskStatus.failure(getId());
} else {
registerResourceCloserOnAbnormalExit(currentSubTaskHolder);
final int totalNumSpecs = indexTaskSpecs.size();
log.info("Generated [%d] compaction task specs", totalNumSpecs);
int failCnt = 0;
for (IndexTask eachSpec : indexTaskSpecs) {
final String json = jsonMapper.writerWithDefaultPrettyPrinter().writeValueAsString(eachSpec);
if (!currentSubTaskHolder.setTask(eachSpec)) {
log.info("Task is asked to stop. Finish as failed.");
return TaskStatus.failure(getId());
}
try {
if (eachSpec.isReady(toolbox.getTaskActionClient())) {
log.info("Running indexSpec: " + json);
final TaskStatus eachResult = eachSpec.run(toolbox);
if (!eachResult.isSuccess()) {
failCnt++;
log.warn("Failed to run indexSpec: [%s].\nTrying the next indexSpec.", json);
}
} else {
failCnt++;
log.warn("indexSpec is not ready: [%s].\nTrying the next indexSpec.", json);
}
}
catch (Exception e) {
failCnt++;
log.warn(e, "Failed to run indexSpec: [%s].\nTrying the next indexSpec.", json);
}
}
log.info("Run [%d] specs, [%d] succeeded, [%d] failed", totalNumSpecs, totalNumSpecs - failCnt, failCnt);
return failCnt == 0 ? TaskStatus.success(getId()) : TaskStatus.failure(getId());
}
}
private Map createContextForSubtask()
{
final Map newContext = new HashMap<>(getContext());
newContext.put(CTX_KEY_APPENDERATOR_TRACKING_TASK_ID, getId());
return newContext;
}
private String createIndexTaskSpecId(int i)
{
return StringUtils.format("%s_%d", getId(), i);
}
/**
* Generate {@link IndexIngestionSpec} from input segments.
*
* @return an empty list if input segments don't exist. Otherwise, a generated ingestionSpec.
*/
@VisibleForTesting
static List createIngestionSchema(
final TaskToolbox toolbox,
final SegmentProvider segmentProvider,
final PartitionConfigurationManager partitionConfigurationManager,
@Nullable final DimensionsSpec dimensionsSpec,
@Nullable final AggregatorFactory[] metricsSpec,
@Nullable final Granularity segmentGranularity,
final ObjectMapper jsonMapper,
final CoordinatorClient coordinatorClient,
final SegmentLoaderFactory segmentLoaderFactory,
final RetryPolicyFactory retryPolicyFactory
) throws IOException, SegmentLoadingException
{
Pair