
org.apache.druid.query.materializedview.DataSourceOptimizer Maven / Gradle / Ivy
The newest version!
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.apache.druid.query.materializedview;
import com.google.common.collect.ImmutableMap;
import com.google.common.collect.ImmutableSortedSet;
import com.google.inject.Inject;
import org.apache.druid.client.TimelineServerView;
import org.apache.druid.java.util.common.ISE;
import org.apache.druid.query.Query;
import org.apache.druid.query.TableDataSource;
import org.apache.druid.query.groupby.GroupByQuery;
import org.apache.druid.query.spec.MultipleIntervalSegmentSpec;
import org.apache.druid.query.timeseries.TimeseriesQuery;
import org.apache.druid.query.topn.TopNQuery;
import org.apache.druid.timeline.TimelineObjectHolder;
import org.joda.time.Interval;
import java.util.ArrayList;
import java.util.Collections;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.concurrent.ConcurrentHashMap;
import java.util.concurrent.atomic.AtomicLong;
import java.util.concurrent.locks.ReadWriteLock;
import java.util.concurrent.locks.ReentrantReadWriteLock;
import java.util.stream.Collectors;
public class DataSourceOptimizer
{
private final ReadWriteLock lock = new ReentrantReadWriteLock();
private final TimelineServerView serverView;
private ConcurrentHashMap derivativesHitCount = new ConcurrentHashMap<>();
private ConcurrentHashMap totalCount = new ConcurrentHashMap<>();
private ConcurrentHashMap hitCount = new ConcurrentHashMap<>();
private ConcurrentHashMap costTime = new ConcurrentHashMap<>();
private ConcurrentHashMap, AtomicLong>> missFields = new ConcurrentHashMap<>();
@Inject
public DataSourceOptimizer(TimelineServerView serverView)
{
this.serverView = serverView;
}
/**
* Do main work about materialized view selection: transform user query to one or more sub-queries.
*
* In the sub-query, the dataSource is the derivative of dataSource in user query, and sum of all sub-queries'
* intervals equals the interval in user query
*
* Derived dataSource with smallest average data size per segment granularity have highest priority to replace the
* datasource in user query
*
* @param query only TopNQuery/TimeseriesQuery/GroupByQuery can be optimized
* @return a list of queries with specified derived dataSources and intervals
*/
public List optimize(Query query)
{
long start = System.currentTimeMillis();
// only topN/timeseries/groupby query can be optimized
// only TableDataSource can be optimiezed
if (!(query instanceof TopNQuery || query instanceof TimeseriesQuery || query instanceof GroupByQuery)
|| !(query.getDataSource() instanceof TableDataSource)) {
return Collections.singletonList(query);
}
String datasourceName = ((TableDataSource) query.getDataSource()).getName();
// get all derivatives for datasource in query. The derivatives set is sorted by average size of
// per segment granularity.
Set derivatives = DerivativeDataSourceManager.getDerivatives(datasourceName);
if (derivatives.isEmpty()) {
return Collections.singletonList(query);
}
lock.readLock().lock();
try {
totalCount.computeIfAbsent(datasourceName, dsName -> new AtomicLong(0)).incrementAndGet();
hitCount.putIfAbsent(datasourceName, new AtomicLong(0));
AtomicLong costTimeOfDataSource = costTime.computeIfAbsent(datasourceName, dsName -> new AtomicLong(0));
// get all fields which the query required
Set requiredFields = MaterializedViewUtils.getRequiredFields(query);
Set derivativesWithRequiredFields = new HashSet<>();
for (DerivativeDataSource derivativeDataSource : derivatives) {
derivativesHitCount.putIfAbsent(derivativeDataSource.getName(), new AtomicLong(0));
if (derivativeDataSource.getColumns().containsAll(requiredFields)) {
derivativesWithRequiredFields.add(derivativeDataSource);
}
}
// if no derivatives contains all required dimensions, this materialized view selection failed.
if (derivativesWithRequiredFields.isEmpty()) {
missFields
.computeIfAbsent(datasourceName, dsName -> new ConcurrentHashMap<>())
.computeIfAbsent(requiredFields, rf -> new AtomicLong(0))
.incrementAndGet();
costTimeOfDataSource.addAndGet(System.currentTimeMillis() - start);
return Collections.singletonList(query);
}
List queries = new ArrayList<>();
List remainingQueryIntervals = query.getIntervals();
for (DerivativeDataSource derivativeDataSource : ImmutableSortedSet.copyOf(derivativesWithRequiredFields)) {
TableDataSource tableDataSource = new TableDataSource(derivativeDataSource.getName());
final List derivativeIntervals = remainingQueryIntervals.stream()
.flatMap(interval -> serverView
.getTimeline(tableDataSource)
.orElseThrow(() -> new ISE("No timeline for dataSource: %s", derivativeDataSource.getName()))
.lookup(interval)
.stream()
.map(TimelineObjectHolder::getInterval)
)
.collect(Collectors.toList());
// if the derivative does not contain any parts of intervals in the query, the derivative will
// not be selected.
if (derivativeIntervals.isEmpty()) {
continue;
}
remainingQueryIntervals = MaterializedViewUtils.minus(remainingQueryIntervals, derivativeIntervals);
queries.add(
query.withDataSource(new TableDataSource(derivativeDataSource.getName()))
.withQuerySegmentSpec(new MultipleIntervalSegmentSpec(derivativeIntervals))
);
derivativesHitCount.get(derivativeDataSource.getName()).incrementAndGet();
if (remainingQueryIntervals.isEmpty()) {
break;
}
}
if (queries.isEmpty()) {
costTime.get(datasourceName).addAndGet(System.currentTimeMillis() - start);
return Collections.singletonList(query);
}
//after materialized view selection, the result of the remaining query interval will be computed based on
// the original datasource.
if (!remainingQueryIntervals.isEmpty()) {
queries.add(query.withQuerySegmentSpec(new MultipleIntervalSegmentSpec(remainingQueryIntervals)));
}
hitCount.get(datasourceName).incrementAndGet();
costTime.get(datasourceName).addAndGet(System.currentTimeMillis() - start);
return queries;
}
finally {
lock.readLock().unlock();
}
}
public List getAndResetStats()
{
ImmutableMap derivativesHitCountSnapshot;
ImmutableMap totalCountSnapshot;
ImmutableMap hitCountSnapshot;
ImmutableMap costTimeSnapshot;
ImmutableMap, AtomicLong>> missFieldsSnapshot;
lock.writeLock().lock();
try {
derivativesHitCountSnapshot = ImmutableMap.copyOf(derivativesHitCount);
totalCountSnapshot = ImmutableMap.copyOf(totalCount);
hitCountSnapshot = ImmutableMap.copyOf(hitCount);
costTimeSnapshot = ImmutableMap.copyOf(costTime);
missFieldsSnapshot = ImmutableMap.copyOf(missFields);
derivativesHitCount.clear();
totalCount.clear();
hitCount.clear();
costTime.clear();
missFields.clear();
}
finally {
lock.writeLock().unlock();
}
List stats = new ArrayList<>();
Map> baseToDerivatives = DerivativeDataSourceManager.getAllDerivatives();
for (Map.Entry> entry : baseToDerivatives.entrySet()) {
Map derivativesStat = new HashMap<>();
for (DerivativeDataSource derivative : entry.getValue()) {
derivativesStat.put(
derivative.getName(),
derivativesHitCountSnapshot.getOrDefault(derivative.getName(), new AtomicLong(0)).get()
);
}
stats.add(
new DataSourceOptimizerStats(
entry.getKey(),
hitCountSnapshot.getOrDefault(entry.getKey(), new AtomicLong(0)).get(),
totalCountSnapshot.getOrDefault(entry.getKey(), new AtomicLong(0)).get(),
costTimeSnapshot.getOrDefault(entry.getKey(), new AtomicLong(0)).get(),
missFieldsSnapshot.getOrDefault(entry.getKey(), new ConcurrentHashMap<>()),
derivativesStat
)
);
}
return stats;
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy