zipkin2.elasticsearch.internal.IndexNameFormatter Maven / Gradle / Ivy
/*
* Copyright The OpenZipkin Authors
* SPDX-License-Identifier: Apache-2.0
*/
package zipkin2.elasticsearch.internal;
import com.google.auto.value.AutoValue;
import java.time.Instant;
import java.time.ZoneOffset;
import java.time.format.DateTimeFormatter;
import java.util.ArrayList;
import java.util.Calendar;
import java.util.GregorianCalendar;
import java.util.List;
import java.util.TimeZone;
import zipkin2.internal.DateUtil;
import zipkin2.internal.Nullable;
import static java.time.LocalDateTime.ofInstant;
import static java.util.Calendar.DAY_OF_MONTH;
/**
* Index-Prefix/type delimiter
* When Elasticsearch dropped support for multiple type indexes, we introduced a delimited naming
* convention to distinguish between span, dependency and autocomplete documents. Originally, this
* was a colon prefix pattern. In version 7, Elasticsearch dropped support for colons in indexes. To
* keep existing writes consistent, we still use colon in versions prior to ES 7, eventhough
* starting at version 7, we change to hyphens. {@code zipkin2.elasticsearch.IndexTemplates} is
* responsible for this decision.
*
* Creating indexes
* Using the default index prefix of "zipkin", when indexes are created, they look like the
* following, based on the version.
*
*
* - ES up to v6: zipkin:span-2019-05-03 zipkin:dependency-2019-05-03 zipkin:autocomplete-2019-05-03
* - ES v7: zipkin-span-2019-05-03 zipkin-dependency-2019-05-03 zipkin-autocomplete-2019-05-03
*
*
* We can allow an index prefix of up to 231 UTF-8 encoded bytes, subject to the index naming
* constraints. This is the normal 255 limit minus the longest suffix (ex. -autocomplete-2019-05-03).
*
*
Reading indexes
* While ES 7 cannot write new indexes with a colons, it can read them. Upon upgrade, some sites
* will have a mixed read state where some indexes delimit types with a colon and others a hyphen.
* Accordingly, we use * in read patterns in place of a type delimiter. We use * because there is no
* support for single character wildcards in ES.
*
* Elasticsearch 7 naming constraints
* According to a recent
* reference, the following index naming constraints apply to index names as of ES 7:
*
*
* - No more than 255 UTF-8 encoded bytes
* - Cannot be . or ..
* - Cannot contain : or #
* - Cannot start with _ - or +
*
*/
@AutoValue
public abstract class IndexNameFormatter {
public static Builder newBuilder() {
return new AutoValue_IndexNameFormatter.Builder();
}
public abstract Builder toBuilder();
private static final TimeZone UTC = TimeZone.getTimeZone("UTC");
public abstract String index();
abstract char dateSeparator();
abstract DateTimeFormatter dateFormat();
@AutoValue.Builder
public abstract static class Builder {
public abstract Builder index(String index);
public abstract Builder dateSeparator(char dateSeparator);
abstract Builder dateFormat(DateTimeFormatter dateFormat);
abstract char dateSeparator();
public final IndexNameFormatter build() {
char separator = dateSeparator();
String format = separator == 0 ? "yyyyMMdd" : "yyyy-MM-dd".replace('-', separator);
return dateFormat(DateTimeFormatter.ofPattern(format).withZone(ZoneOffset.UTC)).autoBuild();
}
abstract IndexNameFormatter autoBuild();
}
/**
* Returns a set of index patterns that represent the range provided. Notably, this compresses
* months or years using wildcards (in order to send smaller API calls).
*
* For example, if {@code beginMillis} is 2016-11-30 and {@code endMillis} is 2017-01-02, the
* result will be 2016-11-30, 2016-12-*, 2017-01-01 and 2017-01-02.
*/
public List formatTypeAndRange(@Nullable String type, long beginMillis, long endMillis) {
GregorianCalendar current = midnightUTC(beginMillis);
GregorianCalendar end = midnightUTC(endMillis);
String prefix = prefix(type);
List indices = new ArrayList<>();
while (current.compareTo(end) <= 0) {
if (current.get(Calendar.MONTH) == Calendar.JANUARY && current.get(DAY_OF_MONTH) == 1) {
// attempt to compress a year
current.set(Calendar.DAY_OF_YEAR, current.getActualMaximum(Calendar.DAY_OF_YEAR));
if (current.compareTo(end) <= 0) {
indices.add(
"%s-%s%c*".formatted(prefix, current.get(Calendar.YEAR), dateSeparator()));
current.add(DAY_OF_MONTH, 1); // rollover to next year
continue;
} else {
current.set(Calendar.DAY_OF_YEAR, 1); // rollback to first of the year
}
} else if (current.get(DAY_OF_MONTH) == 1) {
// attempt to compress a month
current.set(DAY_OF_MONTH, current.getActualMaximum(DAY_OF_MONTH));
if (current.compareTo(end) <= 0) {
indices.add(formatIndexPattern("%s-%s%c%02d%c*", current, prefix));
current.add(DAY_OF_MONTH, 1); // rollover to next month
continue;
}
current.set(DAY_OF_MONTH, 9); // try to compress days 0-9
if (current.compareTo(end) <= 0) {
indices.add(formatIndexPattern("%s-%s%c%02d%c0*", current, prefix));
current.add(DAY_OF_MONTH, 1); // rollover to day 10
continue;
}
current.set(DAY_OF_MONTH, 1); // set back to day 1
} else if (current.get(DAY_OF_MONTH) == 10) {
current.set(DAY_OF_MONTH, 19); // try to compress days 10-19
if (current.compareTo(end) <= 0) {
indices.add(formatIndexPattern("%s-%s%c%02d%c1*", current, prefix));
current.add(DAY_OF_MONTH, 1); // rollover to day 20
continue;
}
current.set(DAY_OF_MONTH, 10); // set back to day 10
} else if (current.get(DAY_OF_MONTH) == 20) {
current.set(DAY_OF_MONTH, 29); // try to compress days 20-29
if (current.compareTo(end) <= 0) {
indices.add(formatIndexPattern("%s-%s%c%02d%c2*", current, prefix));
current.add(DAY_OF_MONTH, 1); // rollover to day 30
continue;
}
current.set(DAY_OF_MONTH, 20); // set back to day 20
}
indices.add(formatTypeAndTimestamp(type, current.getTimeInMillis()));
current.add(DAY_OF_MONTH, 1);
}
return indices;
}
String formatIndexPattern(String format, GregorianCalendar current, String prefix) {
return format.formatted(
prefix,
current.get(Calendar.YEAR),
dateSeparator(),
current.get(Calendar.MONTH) + 1,
dateSeparator());
}
static GregorianCalendar midnightUTC(long epochMillis) {
GregorianCalendar result = new GregorianCalendar(UTC);
result.setTimeInMillis(DateUtil.midnightUTC(epochMillis));
return result;
}
/** On insert, require a version-specific index-type delimiter as ES 7+ dropped colons */
public String formatTypeAndTimestampForInsert(String type, char indexTypeDelimiter,
long timestampMillis) {
return index() + indexTypeDelimiter + type + '-' + format(timestampMillis);
}
public String formatTypeAndTimestamp(@Nullable String type, long timestampMillis) {
return prefix(type) + "-" + format(timestampMillis);
}
private String prefix(@Nullable String type) {
// We use single-character wildcard here in order to read both : and - as starting in ES 7, :
// is no longer permitted.
return type != null ? index() + "*" + type : index();
}
public String formatType(@Nullable String type) {
return prefix(type) + "-*";
}
String format(long timestampMillis) {
return dateFormat().format(ofInstant(Instant.ofEpochMilli(timestampMillis), ZoneOffset.UTC));
}
}