All Downloads are FREE. Search and download functionalities are using the official Maven repository.

io.druid.indexing.firehose.IngestSegmentFirehoseFactory Maven / Gradle / Ivy

There is a newer version: 0.12.3
Show newest version
/*
 * Licensed to Metamarkets Group Inc. (Metamarkets) under one
 * or more contributor license agreements. See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership. Metamarkets licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License. You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing,
 * software distributed under the License is distributed on an
 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 * KIND, either express or implied. See the License for the
 * specific language governing permissions and limitations
 * under the License.
 */

package io.druid.indexing.firehose;

import com.fasterxml.jackson.annotation.JacksonInject;
import com.fasterxml.jackson.annotation.JsonCreator;
import com.fasterxml.jackson.annotation.JsonProperty;
import com.google.common.base.Function;
import com.google.common.base.Preconditions;
import com.google.common.base.Throwables;
import com.google.common.collect.Iterables;
import com.google.common.collect.Lists;
import com.google.common.collect.Ordering;
import com.google.common.collect.Sets;
import com.google.inject.Injector;
import com.metamx.emitter.EmittingLogger;
import io.druid.data.input.Firehose;
import io.druid.data.input.FirehoseFactory;
import io.druid.data.input.impl.InputRowParser;
import io.druid.indexing.common.TaskToolbox;
import io.druid.indexing.common.TaskToolboxFactory;
import io.druid.indexing.common.actions.SegmentListUsedAction;
import io.druid.indexing.common.task.NoopTask;
import io.druid.java.util.common.granularity.Granularities;
import io.druid.java.util.common.parsers.ParseException;
import io.druid.query.filter.DimFilter;
import io.druid.segment.IndexIO;
import io.druid.segment.QueryableIndexStorageAdapter;
import io.druid.segment.loading.SegmentLoadingException;
import io.druid.segment.realtime.firehose.IngestSegmentFirehose;
import io.druid.segment.realtime.firehose.WindowedStorageAdapter;
import io.druid.timeline.DataSegment;
import io.druid.timeline.TimelineObjectHolder;
import io.druid.timeline.VersionedIntervalTimeline;
import io.druid.timeline.partition.PartitionChunk;
import org.joda.time.Interval;

import java.io.File;
import java.io.IOException;
import java.util.List;
import java.util.Map;
import java.util.Set;

public class IngestSegmentFirehoseFactory implements FirehoseFactory
{
  private static final EmittingLogger log = new EmittingLogger(IngestSegmentFirehoseFactory.class);
  private final String dataSource;
  private final Interval interval;
  private final DimFilter dimFilter;
  private final List dimensions;
  private final List metrics;
  private final Injector injector;
  private final IndexIO indexIO;

  @JsonCreator
  public IngestSegmentFirehoseFactory(
      @JsonProperty("dataSource") final String dataSource,
      @JsonProperty("interval") Interval interval,
      @JsonProperty("filter") DimFilter dimFilter,
      @JsonProperty("dimensions") List dimensions,
      @JsonProperty("metrics") List metrics,
      @JacksonInject Injector injector,
      @JacksonInject IndexIO indexIO
  )
  {
    Preconditions.checkNotNull(dataSource, "dataSource");
    Preconditions.checkNotNull(interval, "interval");
    this.dataSource = dataSource;
    this.interval = interval;
    this.dimFilter = dimFilter;
    this.dimensions = dimensions;
    this.metrics = metrics;
    this.injector = injector;
    this.indexIO = Preconditions.checkNotNull(indexIO, "null IndexIO");
  }

  @JsonProperty
  public String getDataSource()
  {
    return dataSource;
  }

  @JsonProperty
  public Interval getInterval()
  {
    return interval;
  }

  @JsonProperty("filter")
  public DimFilter getDimensionsFilter()
  {
    return dimFilter;
  }

  @JsonProperty
  public List getDimensions()
  {
    return dimensions;
  }

  @JsonProperty
  public List getMetrics()
  {
    return metrics;
  }

  @Override
  public Firehose connect(InputRowParser inputRowParser) throws IOException, ParseException
  {
    log.info("Connecting firehose: dataSource[%s], interval[%s]", dataSource, interval);
    // better way to achieve this is to pass toolbox to Firehose, The instance is initialized Lazily on connect method.
    // Noop Task is just used to create the toolbox and list segments.
    final TaskToolbox toolbox = injector.getInstance(TaskToolboxFactory.class).build(
        new NoopTask("reingest", 0, 0, null, null, null)
    );

    try {
      final List usedSegments = toolbox
          .getTaskActionClient()
          .submit(new SegmentListUsedAction(dataSource, interval, null));
      final Map segmentFileMap = toolbox.fetchSegments(usedSegments);
      VersionedIntervalTimeline timeline = new VersionedIntervalTimeline<>(
          Ordering.natural().nullsFirst()
      );

      for (DataSegment segment : usedSegments) {
        timeline.add(segment.getInterval(), segment.getVersion(), segment.getShardSpec().createChunk(segment));
      }
      final List> timeLineSegments = timeline.lookup(
          interval
      );

      final List dims;
      if (dimensions != null) {
        dims = dimensions;
      } else if (inputRowParser.getParseSpec().getDimensionsSpec().hasCustomDimensions()) {
        dims = inputRowParser.getParseSpec().getDimensionsSpec().getDimensionNames();
      } else {
        Set dimSet = Sets.newHashSet(
            Iterables.concat(
                Iterables.transform(
                    timeLineSegments,
                    new Function, Iterable>()
                    {
                      @Override
                      public Iterable apply(
                          TimelineObjectHolder timelineObjectHolder
                      )
                      {
                        return Iterables.concat(
                            Iterables.transform(
                                timelineObjectHolder.getObject(),
                                new Function, Iterable>()
                                {
                                  @Override
                                  public Iterable apply(PartitionChunk input)
                                  {
                                    return input.getObject().getDimensions();
                                  }
                                }
                            )
                        );
                      }
                    }

                )
            )
        );
        dims = Lists.newArrayList(
            Sets.difference(
                dimSet,
                inputRowParser
                    .getParseSpec()
                    .getDimensionsSpec()
                    .getDimensionExclusions()
            )
        );
      }

      final List metricsList;
      if (metrics != null) {
        metricsList = metrics;
      } else {
        Set metricsSet = Sets.newHashSet(
            Iterables.concat(
                Iterables.transform(
                    timeLineSegments,
                    new Function, Iterable>()
                    {
                      @Override
                      public Iterable apply(
                          TimelineObjectHolder input
                      )
                      {
                        return Iterables.concat(
                            Iterables.transform(
                                input.getObject(),
                                new Function, Iterable>()
                                {
                                  @Override
                                  public Iterable apply(PartitionChunk input)
                                  {
                                    return input.getObject().getMetrics();
                                  }
                                }
                            )
                        );
                      }
                    }
                )
            )
        );
        metricsList = Lists.newArrayList(metricsSet);
      }


      final List adapters = Lists.newArrayList(
          Iterables.concat(
              Iterables.transform(
                  timeLineSegments,
                  new Function, Iterable>()
                  {
                    @Override
                    public Iterable apply(final TimelineObjectHolder holder)
                    {
                      return
                          Iterables.transform(
                              holder.getObject(),
                              new Function, WindowedStorageAdapter>()
                              {
                                @Override
                                public WindowedStorageAdapter apply(final PartitionChunk input)
                                {
                                  final DataSegment segment = input.getObject();
                                  try {
                                    return new WindowedStorageAdapter(
                                        new QueryableIndexStorageAdapter(
                                            indexIO.loadIndex(
                                                Preconditions.checkNotNull(
                                                    segmentFileMap.get(segment),
                                                    "File for segment %s", segment.getIdentifier()
                                                )
                                            )
                                        ),
                                        holder.getInterval()
                                    );
                                  }
                                  catch (IOException e) {
                                    throw Throwables.propagate(e);
                                  }
                                }
                              }
                          );
                    }
                  }
              )
          )
      );

      return new IngestSegmentFirehose(adapters, dims, metricsList, dimFilter, Granularities.NONE);
    }
    catch (IOException e) {
      throw Throwables.propagate(e);
    }
    catch (SegmentLoadingException e) {
      throw Throwables.propagate(e);
    }

  }
}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy