
org.apache.cassandra.utils.Overlaps Maven / Gradle / Ivy
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.cassandra.utils;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Comparator;
import java.util.HashSet;
import java.util.List;
import java.util.PriorityQueue;
import java.util.Set;
import java.util.function.BiPredicate;
public class Overlaps
{
/**
* Construct a minimal list of overlap sets, i.e. the sections of the range span when we have overlapping items,
* where we ensure:
* - non-overlapping items are never put in the same set
* - no item is present in non-consecutive sets
* - for any point where items overlap, the result includes a set listing all overlapping items
*
* For example, for inputs A[0, 4), B[2, 8), C[6, 10), D[1, 9) the result would be the sets ABD and BCD. We are not
* interested in the spans where A, B, or C are present on their own or in combination with D, only that there
* exists a set in the list that is a superset of any such combination, and that the non-overlapping A and C are
* never together in a set.
*
* Note that the full list of overlap sets A, AD, ABD, BD, BCD, CD, C is also an answer that satisfies the three
* conditions above, but it contains redundant sets (e.g. AD is already contained in ABD).
*
* @param items A list of items to distribute in overlap sets. This is assumed to be a transient list and the method
* may modify or consume it. It is assumed that the start and end positions of an item are ordered,
* and the items are non-empty.
* @param startsAfter Predicate determining if its left argument's start if fully after the right argument's end.
* This will only be used with arguments where left's start is known to be after right's start.
* It is up to the caller if this is a strict comparison -- strict (>) for end-inclusive spans
* and non-strict (>=) for end-exclusive.
* @param startsComparator Comparator of items' starting positions.
* @param endsComparator Comparator of items' ending positions.
* @return List of overlap sets.
*/
public static List> constructOverlapSets(List items,
BiPredicate startsAfter,
Comparator startsComparator,
Comparator endsComparator)
{
List> overlaps = new ArrayList<>();
if (items.isEmpty())
return overlaps;
PriorityQueue active = new PriorityQueue<>(endsComparator);
items.sort(startsComparator);
for (E item : items)
{
if (!active.isEmpty() && startsAfter.test(item, active.peek()))
{
// New item starts after some active ends. It does not overlap with it, so:
// -- output the previous active set
overlaps.add(new HashSet<>(active));
// -- remove all items that also end before the current start
do
{
active.poll();
}
while (!active.isEmpty() && startsAfter.test(item, active.peek()));
}
// Add the new item to the active state. We don't care if it starts later than others in the active set,
// the important point is that it overlaps with all of them.
active.add(item);
}
assert !active.isEmpty();
overlaps.add(new HashSet<>(active));
return overlaps;
}
public enum InclusionMethod
{
NONE, SINGLE, TRANSITIVE;
}
public interface BucketMaker
{
B makeBucket(List> sets, int startIndexInclusive, int endIndexExclusive);
}
/**
* Assign overlap sections into buckets. Identify sections that have at least threshold-many overlapping
* items and apply the overlap inclusion method to combine with any neighbouring sections that contain
* selected sstables to make sure we make full use of any sstables selected for compaction (i.e. avoid
* recompacting, see {@link org.apache.cassandra.db.compaction.unified.Controller#overlapInclusionMethod()}).
*
* @param threshold Threshold for selecting a bucket. Sets below this size will be ignored, unless they need
* to be grouped with a neighboring set due to overlap.
* @param inclusionMethod NONE to only form buckets of the overlapping sets, SINGLE to include all
* sets that share an sstable with a selected bucket, or TRANSITIVE to include
* all sets that have an overlap chain to a selected bucket.
* @param overlaps An ordered list of overlap sets as returned by {@link #constructOverlapSets}.
* @param bucketer Method used to create a bucket out of the supplied set indexes.
*/
public static List assignOverlapsIntoBuckets(int threshold,
InclusionMethod inclusionMethod,
List> overlaps,
BucketMaker bucketer)
{
List buckets = new ArrayList<>();
int regionCount = overlaps.size();
int lastEnd = -1;
for (int i = 0; i < regionCount; ++i)
{
Set bucket = overlaps.get(i);
int maxOverlap = bucket.size();
if (maxOverlap < threshold)
continue;
int startIndex = i;
int endIndex = i + 1;
if (inclusionMethod != InclusionMethod.NONE)
{
Set allOverlapping = new HashSet<>(bucket);
Set overlapTarget = inclusionMethod == InclusionMethod.TRANSITIVE
? allOverlapping
: bucket;
int j;
for (j = i - 1; j > lastEnd; --j)
{
Set next = overlaps.get(j);
if (!setsIntersect(next, overlapTarget))
break;
allOverlapping.addAll(next);
}
startIndex = j + 1;
for (j = i + 1; j < regionCount; ++j)
{
Set next = overlaps.get(j);
if (!setsIntersect(next, overlapTarget))
break;
allOverlapping.addAll(next);
}
i = j - 1;
endIndex = j;
}
buckets.add(bucketer.makeBucket(overlaps, startIndex, endIndex));
lastEnd = i;
}
return buckets;
}
private static boolean setsIntersect(Set s1, Set s2)
{
// Note: optimized for small sets and O(1) lookup.
for (E s : s1)
if (s2.contains(s))
return true;
return false;
}
/**
* Pull the last elements from the given list, up to the given limit.
*/
public static List pullLast(List source, int limit)
{
List result = new ArrayList<>(limit);
while (--limit >= 0)
result.add(source.remove(source.size() - 1));
return result;
}
/**
* Select up to `limit` sstables from each overlapping set (more than `limit` in total) by taking the last entries
* from `allObjectsSorted`. To achieve this, keep selecting the last sstable until the next one we would add would
* bring the number selected in some overlap section over `limit`.
*/
public static Collection pullLastWithOverlapLimit(List allObjectsSorted, List> overlapSets, int limit)
{
int setsCount = overlapSets.size();
int[] selectedInBucket = new int[setsCount];
int allCount = allObjectsSorted.size();
for (int selectedCount = 0; selectedCount < allCount; ++selectedCount)
{
T candidate = allObjectsSorted.get(allCount - 1 - selectedCount);
for (int i = 0; i < setsCount; ++i)
{
if (overlapSets.get(i).contains(candidate))
{
++selectedInBucket[i];
if (selectedInBucket[i] > limit)
return pullLast(allObjectsSorted, selectedCount);
}
}
}
return allObjectsSorted;
}
}