Many resources are needed to download a project. Please understand that we have to compensate our server costs. Thank you in advance. Project price only 1 $
You can buy this project and download/modify it how often you want.
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.apache.druid.metadata;
import com.fasterxml.jackson.databind.ObjectMapper;
import com.google.common.collect.ImmutableList;
import com.google.common.collect.Iterators;
import com.google.common.collect.Lists;
import com.google.common.collect.UnmodifiableIterator;
import org.apache.druid.java.util.common.CloseableIterators;
import org.apache.druid.java.util.common.DateTimes;
import org.apache.druid.java.util.common.IAE;
import org.apache.druid.java.util.common.Intervals;
import org.apache.druid.java.util.common.StringUtils;
import org.apache.druid.java.util.common.jackson.JacksonUtils;
import org.apache.druid.java.util.common.logger.Logger;
import org.apache.druid.java.util.common.parsers.CloseableIterator;
import org.apache.druid.server.http.DataSegmentPlus;
import org.apache.druid.timeline.DataSegment;
import org.apache.druid.timeline.SegmentId;
import org.joda.time.DateTime;
import org.joda.time.Interval;
import org.skife.jdbi.v2.Handle;
import org.skife.jdbi.v2.PreparedBatch;
import org.skife.jdbi.v2.Query;
import org.skife.jdbi.v2.ResultIterator;
import org.skife.jdbi.v2.SQLStatement;
import org.skife.jdbi.v2.Update;
import javax.annotation.Nullable;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Collections;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.stream.Collectors;
/**
* An object that helps {@link SqlSegmentsMetadataManager} and {@link IndexerSQLMetadataStorageCoordinator} make
* queries to the metadata store segments table. Each instance of this class is scoped to a single handle and is meant
* to be short-lived.
*/
public class SqlSegmentsMetadataQuery
{
private static final Logger log = new Logger(SqlSegmentsMetadataQuery.class);
/**
* Maximum number of intervals to consider for a batch.
* This is similar to {@link IndexerSQLMetadataStorageCoordinator#MAX_NUM_SEGMENTS_TO_ANNOUNCE_AT_ONCE}, but imposed
* on the intervals size.
*/
private static final int MAX_INTERVALS_PER_BATCH = 100;
private final Handle handle;
private final SQLMetadataConnector connector;
private final MetadataStorageTablesConfig dbTables;
private final ObjectMapper jsonMapper;
private SqlSegmentsMetadataQuery(
final Handle handle,
final SQLMetadataConnector connector,
final MetadataStorageTablesConfig dbTables,
final ObjectMapper jsonMapper
)
{
this.handle = handle;
this.connector = connector;
this.dbTables = dbTables;
this.jsonMapper = jsonMapper;
}
/**
* Create a query object. This instance is scoped to a single handle and is meant to be short-lived. It is okay
* to use it for more than one query, though.
*/
public static SqlSegmentsMetadataQuery forHandle(
final Handle handle,
final SQLMetadataConnector connector,
final MetadataStorageTablesConfig dbTables,
final ObjectMapper jsonMapper
)
{
return new SqlSegmentsMetadataQuery(handle, connector, dbTables, jsonMapper);
}
/**
* Retrieves segments for a given datasource that are marked used (i.e. published) in the metadata store, and that
* *overlap* any interval in a particular collection of intervals. If the collection of intervals is empty, this
* method will retrieve all used segments.
*
* You cannot assume that segments returned by this call are actually active. Because there is some delay between
* new segment publishing and the marking-unused of older segments, it is possible that some segments returned
* by this call are overshadowed by other segments. To check for this, use
* {@link org.apache.druid.timeline.SegmentTimeline#forSegments(Iterable)}.
*
* This call does not return any information about realtime segments.
*
* @return a closeable iterator. You should close it when you are done.
*/
public CloseableIterator retrieveUsedSegments(
final String dataSource,
final Collection intervals
)
{
return retrieveUsedSegments(dataSource, intervals, null);
}
/**
* Similar to {@link #retrieveUsedSegments}, but with an additional {@code versions} argument. When {@code versions}
* is specified, all used segments in the specified {@code intervals} and {@code versions} are retrieved.
*/
public CloseableIterator retrieveUsedSegments(
final String dataSource,
final Collection intervals,
final List versions
)
{
return retrieveSegments(
dataSource,
intervals,
versions,
IntervalMode.OVERLAPS,
true,
null,
null,
null,
null
);
}
/**
* Retrieves segments for a given datasource that are marked unused and that are fully contained by any interval
* in a particular collection of intervals. If the collection of intervals is empty, this method will retrieve all
* unused segments.
*
* This call does not return any information about realtime segments.
*
*
* @param dataSource The name of the datasource
* @param intervals The intervals to search over
* @param versions An optional list of unused segment versions to retrieve in the given {@code intervals}.
* If unspecified, all versions of unused segments in the {@code intervals} must be retrieved. If an
* empty list is passed, no segments are retrieved.
* @param limit The limit of segments to return
* @param lastSegmentId the last segment id from which to search for results. All segments returned are >
* this segment lexigraphically if sortOrder is null or ASC, or < this segment
* lexigraphically if sortOrder is DESC.
* @param sortOrder Specifies the order with which to return the matching segments by start time, end time.
* A null value indicates that order does not matter.
* @param maxUsedStatusLastUpdatedTime The maximum {@code used_status_last_updated} time. Any unused segment in {@code intervals}
* with {@code used_status_last_updated} no later than this time will be included in the
* iterator. Segments without {@code used_status_last_updated} time (due to an upgrade
* from legacy Druid) will have {@code maxUsedStatusLastUpdatedTime} ignored
*
* @return a closeable iterator. You should close it when you are done.
*
*/
public CloseableIterator retrieveUnusedSegments(
final String dataSource,
final Collection intervals,
@Nullable final List versions,
@Nullable final Integer limit,
@Nullable final String lastSegmentId,
@Nullable final SortOrder sortOrder,
@Nullable final DateTime maxUsedStatusLastUpdatedTime
)
{
return retrieveSegments(
dataSource,
intervals,
versions,
IntervalMode.CONTAINS,
false,
limit,
lastSegmentId,
sortOrder,
maxUsedStatusLastUpdatedTime
);
}
/**
* Similar to {@link #retrieveUnusedSegments}, but also retrieves associated metadata for the segments for a given
* datasource that are marked unused and that are fully contained by any interval in a particular collection of
* intervals. If the collection of intervals is empty, this method will retrieve all unused segments.
*
* This call does not return any information about realtime segments.
*
* @param dataSource The name of the datasource
* @param intervals The intervals to search over
* @param limit The limit of segments to return
* @param lastSegmentId the last segment id from which to search for results. All segments returned are >
* this segment lexigraphically if sortOrder is null or ASC, or < this segment
* lexigraphically if sortOrder is DESC.
* @param sortOrder Specifies the order with which to return the matching segments by start time, end time.
* A null value indicates that order does not matter.
* @param maxUsedStatusLastUpdatedTime The maximum {@code used_status_last_updated} time. Any unused segment in {@code intervals}
* with {@code used_status_last_updated} no later than this time will be included in the
* iterator. Segments without {@code used_status_last_updated} time (due to an upgrade
* from legacy Druid) will have {@code maxUsedStatusLastUpdatedTime} ignored
* @return a closeable iterator. You should close it when you are done.
*/
public CloseableIterator retrieveUnusedSegmentsPlus(
final String dataSource,
final Collection intervals,
@Nullable final List versions,
@Nullable final Integer limit,
@Nullable final String lastSegmentId,
@Nullable final SortOrder sortOrder,
@Nullable final DateTime maxUsedStatusLastUpdatedTime
)
{
return retrieveSegmentsPlus(
dataSource,
intervals,
versions,
IntervalMode.CONTAINS,
false,
limit,
lastSegmentId,
sortOrder,
maxUsedStatusLastUpdatedTime
);
}
public List retrieveSegmentsById(
String datasource,
Set segmentIds
)
{
final List> partitionedSegmentIds
= Lists.partition(new ArrayList<>(segmentIds), 100);
final List fetchedSegments = new ArrayList<>(segmentIds.size());
for (List partition : partitionedSegmentIds) {
fetchedSegments.addAll(retrieveSegmentBatchById(datasource, partition, false));
}
return fetchedSegments;
}
public List retrieveSegmentsWithSchemaById(
String datasource,
Set segmentIds
)
{
final List> partitionedSegmentIds
= Lists.partition(new ArrayList<>(segmentIds), 100);
final List fetchedSegments = new ArrayList<>(segmentIds.size());
for (List partition : partitionedSegmentIds) {
fetchedSegments.addAll(retrieveSegmentBatchById(datasource, partition, true));
}
return fetchedSegments;
}
private List retrieveSegmentBatchById(
String datasource,
List segmentIds,
boolean includeSchemaInfo
)
{
if (segmentIds.isEmpty()) {
return Collections.emptyList();
}
ResultIterator resultIterator;
if (includeSchemaInfo) {
final Query