All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.druid.timeline.DataSegment Maven / Gradle / Ivy

There is a newer version: 31.0.0
Show newest version
/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *   http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing,
 * software distributed under the License is distributed on an
 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 * KIND, either express or implied.  See the License for the
 * specific language governing permissions and limitations
 * under the License.
 */

package org.apache.druid.timeline;

import com.fasterxml.jackson.annotation.JacksonInject;
import com.fasterxml.jackson.annotation.JsonCreator;
import com.fasterxml.jackson.annotation.JsonInclude;
import com.fasterxml.jackson.annotation.JsonProperty;
import com.fasterxml.jackson.databind.annotation.JsonDeserialize;
import com.fasterxml.jackson.databind.annotation.JsonSerialize;
import com.google.common.annotations.VisibleForTesting;
import com.google.common.base.Preconditions;
import com.google.common.base.Strings;
import com.google.common.collect.ImmutableList;
import com.google.common.collect.ImmutableMap;
import com.google.common.collect.Interner;
import com.google.common.collect.Interners;
import com.google.inject.Inject;
import it.unimi.dsi.fastutil.objects.Object2ObjectArrayMap;
import org.apache.druid.guice.annotations.PublicApi;
import org.apache.druid.jackson.CommaListJoinDeserializer;
import org.apache.druid.jackson.CommaListJoinSerializer;
import org.apache.druid.query.SegmentDescriptor;
import org.apache.druid.timeline.partition.NumberedShardSpec;
import org.apache.druid.timeline.partition.ShardSpec;
import org.joda.time.Interval;

import javax.annotation.Nullable;
import java.util.List;
import java.util.Map;
import java.util.stream.Collectors;

/**
 * Metadata of Druid's data segment. An immutable object.
 *
 * DataSegment's equality ({@link #equals}/{@link #hashCode}) and {@link #compareTo} methods consider only the
 * {@link SegmentId} of the segment.
 */
@PublicApi
public class DataSegment implements Comparable, Overshadowable
{

  public static final String TOMBSTONE_LOADSPEC_TYPE = "tombstone";

  /*
   * The difference between this class and org.apache.druid.segment.Segment is that this class contains the segment
   * metadata only, while org.apache.druid.segment.Segment represents the actual body of segment data, queryable.
   */

  /**
   * This class is needed for optional injection of pruneLoadSpec and pruneLastCompactionState, see
   * github.com/google/guice/wiki/FrequentlyAskedQuestions#how-can-i-inject-optional-parameters-into-a-constructor
   */
  @VisibleForTesting
  public static class PruneSpecsHolder
  {
    @VisibleForTesting
    public static final PruneSpecsHolder DEFAULT = new PruneSpecsHolder();

    @Inject(optional = true)
    @PruneLoadSpec
    boolean pruneLoadSpec = false;

    @Inject(optional = true)
    @PruneLastCompactionState
    boolean pruneLastCompactionState = false;
  }

  private static final Interner STRING_INTERNER = Interners.newWeakInterner();
  private static final Interner> DIMENSIONS_INTERNER = Interners.newWeakInterner();
  private static final Interner> METRICS_INTERNER = Interners.newWeakInterner();
  private static final Interner COMPACTION_STATE_INTERNER = Interners.newWeakInterner();
  private static final Map PRUNED_LOAD_SPEC = ImmutableMap.of(
      "load spec is pruned, because it's not needed on Brokers, but eats a lot of heap space",
      ""
  );

  private final Integer binaryVersion;
  private final SegmentId id;
  @Nullable
  private final Map loadSpec;
  private final List dimensions;
  private final List metrics;
  private final ShardSpec shardSpec;

  /**
   * Stores some configurations of the compaction task which created this segment.
   * This field is filled in the metadata store only when "storeCompactionState" is set true in the context of the
   * task. True by default see {@link org.apache.druid.indexing.common.task.Tasks#DEFAULT_STORE_COMPACTION_STATE}.
   * Also, this field can be pruned in many Druid modules when this class is loaded from the metadata store.
   * See {@link PruneLastCompactionState} for details.
   */
  @Nullable
  private final CompactionState lastCompactionState;
  private final long size;

  @VisibleForTesting
  public DataSegment(
      SegmentId segmentId,
      Map loadSpec,
      List dimensions,
      List metrics,
      ShardSpec shardSpec,
      CompactionState lastCompactionState,
      Integer binaryVersion,
      long size
  )
  {
    this(
        segmentId.getDataSource(),
        segmentId.getInterval(),
        segmentId.getVersion(),
        loadSpec,
        dimensions,
        metrics,
        shardSpec,
        lastCompactionState,
        binaryVersion,
        size
    );
  }

  public DataSegment(
      String dataSource,
      Interval interval,
      String version,
      Map loadSpec,
      List dimensions,
      List metrics,
      ShardSpec shardSpec,
      Integer binaryVersion,
      long size
  )
  {
    this(
        dataSource,
        interval,
        version,
        loadSpec,
        dimensions,
        metrics,
        shardSpec,
        null,
        binaryVersion,
        size
    );
  }

  public DataSegment(
      String dataSource,
      Interval interval,
      String version,
      Map loadSpec,
      List dimensions,
      List metrics,
      ShardSpec shardSpec,
      @Nullable CompactionState lastCompactionState,
      Integer binaryVersion,
      long size
  )
  {
    this(
        dataSource,
        interval,
        version,
        loadSpec,
        dimensions,
        metrics,
        shardSpec,
        lastCompactionState,
        binaryVersion,
        size,
        PruneSpecsHolder.DEFAULT
    );
  }

  @JsonCreator
  public DataSegment(
      @JsonProperty("dataSource") String dataSource,
      @JsonProperty("interval") Interval interval,
      @JsonProperty("version") String version,
      // use `Map` *NOT* `LoadSpec` because we want to do lazy materialization to prevent dependency pollution
      @JsonProperty("loadSpec") @Nullable Map loadSpec,
      @JsonProperty("dimensions")
      @JsonDeserialize(using = CommaListJoinDeserializer.class)
      @Nullable
          List dimensions,
      @JsonProperty("metrics")
      @JsonDeserialize(using = CommaListJoinDeserializer.class)
      @Nullable
          List metrics,
      @JsonProperty("shardSpec") @Nullable ShardSpec shardSpec,
      @JsonProperty("lastCompactionState") @Nullable CompactionState lastCompactionState,
      @JsonProperty("binaryVersion") Integer binaryVersion,
      @JsonProperty("size") long size,
      @JacksonInject PruneSpecsHolder pruneSpecsHolder
  )
  {
    this.id = SegmentId.of(dataSource, interval, version, shardSpec);
    // prune loadspec if needed
    this.loadSpec = pruneSpecsHolder.pruneLoadSpec ? PRUNED_LOAD_SPEC : prepareLoadSpec(loadSpec);
    // Deduplicating dimensions and metrics lists as a whole because they are very likely the same for the same
    // dataSource
    this.dimensions = prepareDimensionsOrMetrics(dimensions, DIMENSIONS_INTERNER);
    this.metrics = prepareDimensionsOrMetrics(metrics, METRICS_INTERNER);
    this.shardSpec = (shardSpec == null) ? new NumberedShardSpec(0, 1) : shardSpec;
    this.lastCompactionState = pruneSpecsHolder.pruneLastCompactionState
                               ? null
                               : prepareCompactionState(lastCompactionState);
    this.binaryVersion = binaryVersion;
    Preconditions.checkArgument(size >= 0);
    this.size = size;
  }

  @Nullable
  private Map prepareLoadSpec(@Nullable Map loadSpec)
  {
    if (loadSpec == null) {
      return null;
    }
    // Load spec is just of 3 entries on average; HashMap/LinkedHashMap consumes much more memory than ArrayMap
    Map result = new Object2ObjectArrayMap<>(loadSpec.size());
    for (Map.Entry e : loadSpec.entrySet()) {
      result.put(STRING_INTERNER.intern(e.getKey()), e.getValue());
    }
    return result;
  }

  @Nullable
  private CompactionState prepareCompactionState(@Nullable CompactionState lastCompactionState)
  {
    if (lastCompactionState == null) {
      return null;
    }
    return COMPACTION_STATE_INTERNER.intern(lastCompactionState);
  }

  private List prepareDimensionsOrMetrics(@Nullable List list, Interner> interner)
  {
    if (list == null) {
      return ImmutableList.of();
    } else {
      List result = list
          .stream()
          .filter(s -> !Strings.isNullOrEmpty(s))
          // dimensions & metrics are stored as canonical string values to decrease memory required for storing
          // large numbers of segments.
          .map(STRING_INTERNER::intern)
          // TODO replace with ImmutableList.toImmutableList() when updated to Guava 21+
          .collect(Collectors.collectingAndThen(Collectors.toList(), ImmutableList::copyOf));
      return interner.intern(result);
    }
  }

  /**
   * Get dataSource
   *
   * @return the dataSource
   */
  @JsonProperty
  public String getDataSource()
  {
    return id.getDataSource();
  }

  @JsonProperty
  public Interval getInterval()
  {
    return id.getInterval();
  }

  @Nullable
  @JsonProperty
  public Map getLoadSpec()
  {
    return loadSpec;
  }

  @JsonProperty("version")
  @Override
  public String getVersion()
  {
    return id.getVersion();
  }

  @JsonProperty
  @JsonSerialize(using = CommaListJoinSerializer.class)
  public List getDimensions()
  {
    return dimensions;
  }

  @JsonProperty
  @JsonSerialize(using = CommaListJoinSerializer.class)
  public List getMetrics()
  {
    return metrics;
  }

  @JsonProperty
  public ShardSpec getShardSpec()
  {
    return shardSpec;
  }

  @Nullable
  @JsonProperty
  @JsonInclude(JsonInclude.Include.NON_NULL)
  public CompactionState getLastCompactionState()
  {
    return lastCompactionState;
  }

  @JsonProperty
  public Integer getBinaryVersion()
  {
    return binaryVersion;
  }

  @JsonProperty
  public long getSize()
  {
    return size;
  }

  // "identifier" for backward compatibility of JSON API
  @JsonProperty(value = "identifier", access = JsonProperty.Access.READ_ONLY)
  public SegmentId getId()
  {
    return id;
  }

  public boolean isTombstone()
  {
    return getShardSpec().getType().equals(ShardSpec.Type.TOMBSTONE);
  }

  @Override
  public boolean overshadows(DataSegment other)
  {
    if (id.getDataSource().equals(other.id.getDataSource()) && id.getInterval().overlaps(other.id.getInterval())) {
      final int majorVersionCompare = id.getVersion().compareTo(other.id.getVersion());
      if (majorVersionCompare > 0) {
        return true;
      } else if (majorVersionCompare == 0) {
        return includeRootPartitions(other) && getMinorVersion() > other.getMinorVersion();
      }
    }
    return false;
  }

  @Override
  public int getStartRootPartitionId()
  {
    return shardSpec.getStartRootPartitionId();
  }

  @Override
  public int getEndRootPartitionId()
  {
    return shardSpec.getEndRootPartitionId();
  }

  @Override
  public short getMinorVersion()
  {
    return shardSpec.getMinorVersion();
  }

  @Override
  public short getAtomicUpdateGroupSize()
  {
    return shardSpec.getAtomicUpdateGroupSize();
  }

  private boolean includeRootPartitions(DataSegment other)
  {
    return shardSpec.getStartRootPartitionId() <= other.shardSpec.getStartRootPartitionId()
           && shardSpec.getEndRootPartitionId() >= other.shardSpec.getEndRootPartitionId();
  }

  public SegmentDescriptor toDescriptor()
  {
    return id.toDescriptor();
  }

  public DataSegment withLoadSpec(Map loadSpec)
  {
    return builder(this).loadSpec(loadSpec).build();
  }

  public DataSegment withDimensions(List dimensions)
  {
    return builder(this).dimensions(dimensions).build();
  }

  public DataSegment withMetrics(List metrics)
  {
    return builder(this).metrics(metrics).build();
  }

  public DataSegment withShardSpec(ShardSpec newSpec)
  {
    return builder(this).shardSpec(newSpec).build();
  }

  public DataSegment withSize(long size)
  {
    return builder(this).size(size).build();
  }

  public DataSegment withVersion(String version)
  {
    return builder(this).version(version).build();
  }

  public DataSegment withBinaryVersion(int binaryVersion)
  {
    return builder(this).binaryVersion(binaryVersion).build();
  }

  public DataSegment withLastCompactionState(CompactionState compactionState)
  {
    return builder(this).lastCompactionState(compactionState).build();
  }

  @Override
  public int compareTo(DataSegment dataSegment)
  {
    return getId().compareTo(dataSegment.getId());
  }

  @Override
  public boolean equals(Object o)
  {
    if (o instanceof DataSegment) {
      return getId().equals(((DataSegment) o).getId());
    }
    return false;
  }

  @Override
  public int hashCode()
  {
    return getId().hashCode();
  }

  @Override
  public String toString()
  {
    return "DataSegment{" +
           "binaryVersion=" + binaryVersion +
           ", id=" + id +
           ", loadSpec=" + loadSpec +
           ", dimensions=" + dimensions +
           ", metrics=" + metrics +
           ", shardSpec=" + shardSpec +
           ", lastCompactionState=" + lastCompactionState +
           ", size=" + size +
           '}';
  }

  public static Builder builder()
  {
    return new Builder();
  }

  public static Builder builder(DataSegment segment)
  {
    return new Builder(segment);
  }

  public static class Builder
  {
    private String dataSource;
    private Interval interval;
    private String version;
    private Map loadSpec;
    private List dimensions;
    private List metrics;
    private ShardSpec shardSpec;
    private CompactionState lastCompactionState;
    private Integer binaryVersion;
    private long size;

    public Builder()
    {
      this.loadSpec = ImmutableMap.of();
      this.dimensions = ImmutableList.of();
      this.metrics = ImmutableList.of();
      this.shardSpec = new NumberedShardSpec(0, 1);
      this.size = -1;
    }

    public Builder(DataSegment segment)
    {
      this.dataSource = segment.getDataSource();
      this.interval = segment.getInterval();
      this.version = segment.getVersion();
      this.loadSpec = segment.getLoadSpec();
      this.dimensions = segment.getDimensions();
      this.metrics = segment.getMetrics();
      this.shardSpec = segment.getShardSpec();
      this.lastCompactionState = segment.getLastCompactionState();
      this.binaryVersion = segment.getBinaryVersion();
      this.size = segment.getSize();
    }

    public Builder dataSource(String dataSource)
    {
      this.dataSource = dataSource;
      return this;
    }

    public Builder interval(Interval interval)
    {
      this.interval = interval;
      return this;
    }

    public Builder version(String version)
    {
      this.version = version;
      return this;
    }

    public Builder loadSpec(Map loadSpec)
    {
      this.loadSpec = loadSpec;
      return this;
    }

    public Builder dimensions(List dimensions)
    {
      this.dimensions = dimensions;
      return this;
    }

    public Builder metrics(List metrics)
    {
      this.metrics = metrics;
      return this;
    }

    public Builder shardSpec(ShardSpec shardSpec)
    {
      this.shardSpec = shardSpec;
      return this;
    }

    public Builder lastCompactionState(CompactionState compactionState)
    {
      this.lastCompactionState = compactionState;
      return this;
    }

    public Builder binaryVersion(Integer binaryVersion)
    {
      this.binaryVersion = binaryVersion;
      return this;
    }

    public Builder size(long size)
    {
      this.size = size;
      return this;
    }

    public DataSegment build()
    {
      // Check stuff that goes into the id, at least.
      Preconditions.checkNotNull(dataSource, "dataSource");
      Preconditions.checkNotNull(interval, "interval");
      Preconditions.checkNotNull(version, "version");
      Preconditions.checkNotNull(shardSpec, "shardSpec");

      return new DataSegment(
          dataSource,
          interval,
          version,
          loadSpec,
          dimensions,
          metrics,
          shardSpec,
          lastCompactionState,
          binaryVersion,
          size
      );
    }
  }

  @Override
  public boolean hasData()
  {
    return !isTombstone();
  }

}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy