All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.druid.server.http.MetadataResource Maven / Gradle / Ivy

There is a newer version: 32.0.1
Show newest version
/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *   http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing,
 * software distributed under the License is distributed on an
 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 * KIND, either express or implied.  See the License for the
 * specific language governing permissions and limitations
 * under the License.
 */

package org.apache.druid.server.http;

import com.google.common.base.Function;
import com.google.common.base.Throwables;
import com.google.common.collect.Collections2;
import com.google.common.collect.ImmutableMap;
import com.google.common.collect.Iterables;
import com.google.inject.Inject;
import com.sun.jersey.spi.container.ResourceFilters;
import org.apache.druid.client.DataSourcesSnapshot;
import org.apache.druid.client.ImmutableDruidDataSource;
import org.apache.druid.error.DruidException;
import org.apache.druid.error.InvalidInput;
import org.apache.druid.indexing.overlord.IndexerMetadataStorageCoordinator;
import org.apache.druid.indexing.overlord.Segments;
import org.apache.druid.java.util.common.Intervals;
import org.apache.druid.java.util.common.logger.Logger;
import org.apache.druid.metadata.SegmentsMetadataManager;
import org.apache.druid.metadata.SortOrder;
import org.apache.druid.segment.metadata.AvailableSegmentMetadata;
import org.apache.druid.segment.metadata.CoordinatorSegmentMetadataCache;
import org.apache.druid.segment.metadata.DataSourceInformation;
import org.apache.druid.server.JettyUtils;
import org.apache.druid.server.coordinator.DruidCoordinator;
import org.apache.druid.server.http.security.DatasourceResourceFilter;
import org.apache.druid.server.security.AuthorizationUtils;
import org.apache.druid.server.security.AuthorizerMapper;
import org.apache.druid.server.security.ResourceAction;
import org.apache.druid.timeline.DataSegment;
import org.apache.druid.timeline.SegmentId;
import org.apache.druid.timeline.SegmentStatusInCluster;
import org.joda.time.Interval;

import javax.annotation.Nullable;
import javax.servlet.http.HttpServletRequest;
import javax.ws.rs.GET;
import javax.ws.rs.POST;
import javax.ws.rs.Path;
import javax.ws.rs.PathParam;
import javax.ws.rs.Produces;
import javax.ws.rs.QueryParam;
import javax.ws.rs.core.Context;
import javax.ws.rs.core.MediaType;
import javax.ws.rs.core.Response;
import javax.ws.rs.core.UriInfo;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Collections;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.TreeSet;
import java.util.stream.Collectors;
import java.util.stream.Stream;

/**
 */
@Path("/druid/coordinator/v1/metadata")
public class MetadataResource
{
  private static final Logger log = new Logger(MetadataResource.class);
  private final SegmentsMetadataManager segmentsMetadataManager;
  private final IndexerMetadataStorageCoordinator metadataStorageCoordinator;
  private final AuthorizerMapper authorizerMapper;
  private final DruidCoordinator coordinator;
  private final @Nullable CoordinatorSegmentMetadataCache coordinatorSegmentMetadataCache;

  @Inject
  public MetadataResource(
      SegmentsMetadataManager segmentsMetadataManager,
      IndexerMetadataStorageCoordinator metadataStorageCoordinator,
      AuthorizerMapper authorizerMapper,
      DruidCoordinator coordinator,
      @Nullable CoordinatorSegmentMetadataCache coordinatorSegmentMetadataCache
  )
  {
    this.segmentsMetadataManager = segmentsMetadataManager;
    this.metadataStorageCoordinator = metadataStorageCoordinator;
    this.authorizerMapper = authorizerMapper;
    this.coordinator = coordinator;
    this.coordinatorSegmentMetadataCache = coordinatorSegmentMetadataCache;
  }

  @GET
  @Path("/datasources")
  @Produces(MediaType.APPLICATION_JSON)
  public Response getDataSources(
      @QueryParam("full") final String full,
      @Context final UriInfo uriInfo,
      @Context final HttpServletRequest req
  )
  {
    final boolean includeUnused = JettyUtils.getQueryParam(uriInfo, "includeUnused", "includeDisabled") != null;
    Collection druidDataSources = null;
    final TreeSet dataSourceNamesPreAuth;
    if (includeUnused) {
      dataSourceNamesPreAuth = new TreeSet<>(segmentsMetadataManager.retrieveAllDataSourceNames());
    } else {
      druidDataSources = segmentsMetadataManager.getImmutableDataSourcesWithAllUsedSegments();
      dataSourceNamesPreAuth = druidDataSources
          .stream()
          .map(ImmutableDruidDataSource::getName)
          .collect(Collectors.toCollection(TreeSet::new));
    }

    final TreeSet dataSourceNamesPostAuth = new TreeSet<>();
    Function> raGenerator = datasourceName ->
        Collections.singletonList(AuthorizationUtils.DATASOURCE_READ_RA_GENERATOR.apply(datasourceName));

    Iterables.addAll(
        dataSourceNamesPostAuth,
        AuthorizationUtils.filterAuthorizedResources(
            req,
            dataSourceNamesPreAuth,
            raGenerator,
            authorizerMapper
        )
    );

    // Cannot do both includeUnused and full, let includeUnused take priority
    // Always use dataSourceNamesPostAuth to determine the set of returned dataSources
    if (full != null && !includeUnused) {
      return Response.ok().entity(
          Collections2.filter(druidDataSources, dataSource -> dataSourceNamesPostAuth.contains(dataSource.getName()))
      ).build();
    } else {
      return Response.ok().entity(dataSourceNamesPostAuth).build();
    }
  }

  @GET
  @Path("/segments")
  @Produces(MediaType.APPLICATION_JSON)
  public Response getAllUsedSegments(
      @Context final HttpServletRequest req,
      @QueryParam("datasources") final @Nullable Set dataSources,
      @QueryParam("includeOvershadowedStatus") final @Nullable String includeOvershadowedStatus,
      @QueryParam("includeRealtimeSegments") final @Nullable String includeRealtimeSegments
  )
  {
    try {
      // realtime segments can be requested only when includeOverShadowedStatus is set
      if (includeOvershadowedStatus == null && includeRealtimeSegments != null) {
        return Response.status(Response.Status.BAD_REQUEST).build();
      }

      if (includeOvershadowedStatus != null) {
        // note that realtime segments are returned only when druid.centralizedDatasourceSchema.enabled is set on the Coordinator
        // when the feature is disabled we do not want to increase the payload size polled by the Brokers, since they already have this information
        return getAllUsedSegmentsWithAdditionalDetails(req, dataSources, includeRealtimeSegments);
      }

      Collection dataSourcesWithUsedSegments =
          segmentsMetadataManager.getImmutableDataSourcesWithAllUsedSegments();
      if (dataSources != null && !dataSources.isEmpty()) {
        dataSourcesWithUsedSegments = dataSourcesWithUsedSegments
            .stream()
            .filter(dataSourceWithUsedSegments -> dataSources.contains(dataSourceWithUsedSegments.getName()))
            .collect(Collectors.toList());
      }
      final Stream usedSegments = dataSourcesWithUsedSegments
          .stream()
          .flatMap(t -> t.getSegments().stream());

      final Function> raGenerator = segment -> Collections.singletonList(
          AuthorizationUtils.DATASOURCE_READ_RA_GENERATOR.apply(segment.getDataSource()));

      final Iterable authorizedSegments =
          AuthorizationUtils.filterAuthorizedResources(req, usedSegments::iterator, raGenerator, authorizerMapper);

      Response.ResponseBuilder builder = Response.status(Response.Status.OK);
      return builder.entity(authorizedSegments).build();
    }
    catch (DruidException e) {
      return ServletResourceUtils.buildErrorResponseFrom(e);
    }
    catch (Exception e) {
      log.error(e, "Error while fetching used segment information.");
      return Response.serverError().entity(ImmutableMap.of("error", e.toString())).build();
    }
  }

  private Response getAllUsedSegmentsWithAdditionalDetails(
      HttpServletRequest req,
      @Nullable Set dataSources,
      String includeRealtimeSegments
  )
  {
    DataSourcesSnapshot dataSourcesSnapshot = segmentsMetadataManager.getSnapshotOfDataSourcesWithAllUsedSegments();
    Collection dataSourcesWithUsedSegments =
        dataSourcesSnapshot.getDataSourcesWithAllUsedSegments();
    if (dataSources != null && !dataSources.isEmpty()) {
      dataSourcesWithUsedSegments = dataSourcesWithUsedSegments
          .stream()
          .filter(dataSourceWithUsedSegments -> dataSources.contains(dataSourceWithUsedSegments.getName()))
          .collect(Collectors.toList());
    }
    final Set overshadowedSegments = dataSourcesSnapshot.getOvershadowedSegments();
    final Set segmentAlreadySeen = new HashSet<>();
    final Stream segmentStatus = dataSourcesWithUsedSegments
        .stream()
        .flatMap(t -> t.getSegments().stream())
        .map(segment -> {
          // The replication factor for unloaded segments is 0 as they will be unloaded soon
          boolean isOvershadowed = overshadowedSegments.contains(segment);
          Integer replicationFactor = isOvershadowed ? (Integer) 0
                                                     : coordinator.getReplicationFactor(segment.getId());

          Long numRows = null;
          if (coordinatorSegmentMetadataCache != null) {
            AvailableSegmentMetadata availableSegmentMetadata = coordinatorSegmentMetadataCache.getAvailableSegmentMetadata(
                segment.getDataSource(),
                segment.getId()
            );
            if (null != availableSegmentMetadata) {
              numRows = availableSegmentMetadata.getNumRows();
            }
          }
          segmentAlreadySeen.add(segment.getId());
          return new SegmentStatusInCluster(
              segment,
              isOvershadowed,
              replicationFactor,
              numRows,
              // published segment can't be realtime
              false
          );
        });

    Stream finalSegments = segmentStatus;

    // conditionally add realtime segments information
    if (includeRealtimeSegments != null && coordinatorSegmentMetadataCache != null) {
      final Stream realtimeSegmentStatus = coordinatorSegmentMetadataCache
          .getSegmentMetadataSnapshot()
          .values()
          .stream()
          .filter(availableSegmentMetadata ->
                      !segmentAlreadySeen.contains(availableSegmentMetadata.getSegment().getId()))
          .map(availableSegmentMetadata ->
                   new SegmentStatusInCluster(
                       availableSegmentMetadata.getSegment(),
                       false,
                       // replication factor is null for unpublished segments
                       null,
                       availableSegmentMetadata.getNumRows(),
                       availableSegmentMetadata.isRealtime() != 0
                   ));

      finalSegments = Stream.concat(segmentStatus, realtimeSegmentStatus);
    }

    final Function> raGenerator = segment -> Collections
        .singletonList(AuthorizationUtils.DATASOURCE_READ_RA_GENERATOR.apply(segment.getDataSegment().getDataSource()));

    final Iterable authorizedSegments = AuthorizationUtils.filterAuthorizedResources(
        req,
        finalSegments::iterator,
        raGenerator,
        authorizerMapper
    );

    Response.ResponseBuilder builder = Response.status(Response.Status.OK);
    return builder.entity(authorizedSegments).build();
  }

  /**
   * The difference of this method from {@link #getUsedSegmentsInDataSource} is that the latter returns only a list of
   * segments, while this method also includes the properties of data source, such as the time when it was created.
   */
  @GET
  @Path("/datasources/{dataSourceName}")
  @Produces(MediaType.APPLICATION_JSON)
  @ResourceFilters(DatasourceResourceFilter.class)
  public Response getDataSourceWithUsedSegments(@PathParam("dataSourceName") final String dataSourceName)
  {
    ImmutableDruidDataSource dataSource =
        segmentsMetadataManager.getImmutableDataSourceWithUsedSegments(dataSourceName);
    if (dataSource == null) {
      return Response.status(Response.Status.NOT_FOUND).build();
    }

    return Response.status(Response.Status.OK).entity(dataSource).build();
  }

  @GET
  @Path("/datasources/{dataSourceName}/segments")
  @Produces(MediaType.APPLICATION_JSON)
  @ResourceFilters(DatasourceResourceFilter.class)
  public Response getUsedSegmentsInDataSource(
      @PathParam("dataSourceName") String dataSourceName,
      @QueryParam("full") @Nullable String full
  )
  {
    ImmutableDruidDataSource dataSource =
        segmentsMetadataManager.getImmutableDataSourceWithUsedSegments(dataSourceName);
    if (dataSource == null) {
      return Response.status(Response.Status.NOT_FOUND).build();
    }

    Response.ResponseBuilder builder = Response.status(Response.Status.OK);
    if (full != null) {
      return builder.entity(dataSource.getSegments()).build();
    }

    return builder.entity(Collections2.transform(dataSource.getSegments(), DataSegment::getId)).build();
  }

  /**
   * This is a {@link POST} method to pass the list of intervals in the body,
   * see https://github.com/apache/druid/pull/2109#issuecomment-182191258
   */
  @POST
  @Path("/datasources/{dataSourceName}/segments")
  @Produces(MediaType.APPLICATION_JSON)
  @ResourceFilters(DatasourceResourceFilter.class)
  public Response getUsedSegmentsInDataSourceForIntervals(
      @PathParam("dataSourceName") String dataSourceName,
      @QueryParam("full") @Nullable String full,
      List intervals
  )
  {
    Collection segments = metadataStorageCoordinator
        .retrieveUsedSegmentsForIntervals(dataSourceName, intervals, Segments.INCLUDING_OVERSHADOWED);

    Response.ResponseBuilder builder = Response.status(Response.Status.OK);
    if (full != null) {
      return builder.entity(segments).build();
    }

    return builder.entity(Collections2.transform(segments, DataSegment::getId)).build();
  }

  @GET
  @Path("/datasources/{dataSourceName}/unusedSegments")
  @Produces(MediaType.APPLICATION_JSON)
  @ResourceFilters(DatasourceResourceFilter.class)
  public Response getUnusedSegmentsInDataSource(
      @Context final HttpServletRequest req,
      @PathParam("dataSourceName") final String dataSource,
      @QueryParam("interval") @Nullable String interval,
      @QueryParam("limit") @Nullable Integer limit,
      @QueryParam("lastSegmentId") @Nullable final String lastSegmentId,
      @QueryParam("sortOrder") @Nullable final String sortOrder
  )
  {
    try {
      if (dataSource == null || dataSource.isEmpty()) {
        throw InvalidInput.exception("dataSourceName must be non-empty.");
      }

      if (limit != null && limit < 0) {
        throw InvalidInput.exception("Invalid limit[%s] specified. Limit must be > 0.", limit);
      }

      if (lastSegmentId != null && SegmentId.tryParse(dataSource, lastSegmentId) == null) {
        throw InvalidInput.exception("Invalid lastSegmentId[%s] specified.", lastSegmentId);
      }

      final SortOrder theSortOrder = sortOrder == null ? null : SortOrder.fromValue(sortOrder);

      final Interval theInterval = interval != null ? Intervals.of(interval.replace('_', '/')) : null;
      final Iterable unusedSegments = segmentsMetadataManager.iterateAllUnusedSegmentsForDatasource(
          dataSource,
          theInterval,
          limit,
          lastSegmentId,
          theSortOrder
      );

      final List retVal = new ArrayList<>();
      unusedSegments.iterator().forEachRemaining(retVal::add);
      return Response.status(Response.Status.OK).entity(retVal).build();
    }
    catch (DruidException e) {
      return ServletResourceUtils.buildErrorResponseFrom(e);
    }
    catch (Exception e) {
      return Response
          .serverError()
          .entity(ImmutableMap.of("error", "Exception occurred.", "message", Throwables.getRootCause(e).toString()))
          .build();
    }
  }

  @GET
  @Path("/datasources/{dataSourceName}/segments/{segmentId}")
  @Produces(MediaType.APPLICATION_JSON)
  @ResourceFilters(DatasourceResourceFilter.class)
  public Response getSegment(
      @PathParam("dataSourceName") String dataSourceName,
      @PathParam("segmentId") String segmentId,
      @QueryParam("includeUnused") @Nullable Boolean includeUnused
  )
  {
    ImmutableDruidDataSource dataSource = segmentsMetadataManager.getImmutableDataSourceWithUsedSegments(dataSourceName);
    if (dataSource == null) {
      return Response.status(Response.Status.NOT_FOUND).build();
    }

    for (SegmentId possibleSegmentId : SegmentId.iteratePossibleParsingsWithDataSource(dataSourceName, segmentId)) {
      DataSegment segment = dataSource.getSegment(possibleSegmentId);
      if (segment != null) {
        return Response.status(Response.Status.OK).entity(segment).build();
      }
    }
    // fallback to db
    DataSegment segment = metadataStorageCoordinator.retrieveSegmentForId(segmentId, Boolean.TRUE.equals(includeUnused));
    if (segment != null) {
      return Response.status(Response.Status.OK).entity(segment).build();
    }
    return Response.status(Response.Status.NOT_FOUND).build();
  }

  /**
   * API to fetch {@link DataSourceInformation} for the specified datasources.
   *
   * @param dataSources list of dataSources to be queried
   * @return information including schema details for the specified datasources
   */
  @POST
  @Path("/dataSourceInformation")
  @Produces(MediaType.APPLICATION_JSON)
  public Response getDataSourceInformation(
      @Context final HttpServletRequest req,
      final List dataSources
  )
  {
    // if {@code coordinatorSegmentMetadataCache} is null, implies the feature is disabled. Return NOT_FOUND.
    if (coordinatorSegmentMetadataCache == null) {
      return Response.status(Response.Status.NOT_FOUND).build();
    }
    Map dataSourceSchemaMap = coordinatorSegmentMetadataCache.getDataSourceInformationMap();

    List results = new ArrayList<>();

    for (Map.Entry entry : dataSourceSchemaMap.entrySet()) {
      if (dataSources.contains(entry.getKey())) {
        results.add(entry.getValue());
      }
    }

    final Function> raGenerator = dataSourceInformation -> Collections
        .singletonList(AuthorizationUtils.DATASOURCE_READ_RA_GENERATOR.apply(dataSourceInformation.getDataSource()));

    final Iterable authorizedDataSourceInformation = AuthorizationUtils.filterAuthorizedResources(
        req,
        results,
        raGenerator,
        authorizerMapper
    );
    return Response.status(Response.Status.OK).entity(authorizedDataSourceInformation).build();
  }

  /**
   * @return all bootstrap segments determined by the coordinator.
   */
  @POST
  @Path("/bootstrapSegments")
  @Produces(MediaType.APPLICATION_JSON)
  @ResourceFilters(DatasourceResourceFilter.class)
  public Response getBootstrapSegments()
  {
    final Set broadcastSegments = coordinator.getBroadcastSegments();
    if (broadcastSegments == null) {
      return Response.status(Response.Status.SERVICE_UNAVAILABLE)
                     .entity("Bootstrap segments are not initialized yet."
                         + " Please ensure that the Coordinator duties are running and try again.")
                     .build();
    }
    return Response.status(Response.Status.OK).entity(broadcastSegments).build();
  }
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy