All Downloads are FREE. Search and download functionalities are using the official Maven repository.
Please wait. This can take some minutes ...
Many resources are needed to download a project. Please understand that we have to compensate our server costs. Thank you in advance.
Project price only 1 $
You can buy this project and download/modify it how often you want.
io.druid.indexing.firehose.IngestSegmentFirehoseFactory Maven / Gradle / Ivy
/*
* Druid - a distributed column store.
* Copyright 2012 - 2015 Metamarkets Group Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package io.druid.indexing.firehose;
import com.fasterxml.jackson.annotation.JacksonInject;
import com.fasterxml.jackson.annotation.JsonCreator;
import com.fasterxml.jackson.annotation.JsonProperty;
import com.google.common.base.Function;
import com.google.common.base.Preconditions;
import com.google.common.base.Throwables;
import com.google.common.collect.Iterables;
import com.google.common.collect.Lists;
import com.google.common.collect.Ordering;
import com.google.common.collect.Sets;
import com.google.inject.Injector;
import com.metamx.common.parsers.ParseException;
import com.metamx.emitter.EmittingLogger;
import io.druid.data.input.Firehose;
import io.druid.data.input.FirehoseFactory;
import io.druid.data.input.impl.InputRowParser;
import io.druid.granularity.QueryGranularity;
import io.druid.indexing.common.TaskToolbox;
import io.druid.indexing.common.TaskToolboxFactory;
import io.druid.indexing.common.actions.SegmentListUsedAction;
import io.druid.indexing.common.task.NoopTask;
import io.druid.query.filter.DimFilter;
import io.druid.segment.IndexIO;
import io.druid.segment.QueryableIndexStorageAdapter;
import io.druid.segment.loading.SegmentLoadingException;
import io.druid.segment.realtime.firehose.IngestSegmentFirehose;
import io.druid.segment.realtime.firehose.WindowedStorageAdapter;
import io.druid.timeline.DataSegment;
import io.druid.timeline.TimelineObjectHolder;
import io.druid.timeline.VersionedIntervalTimeline;
import io.druid.timeline.partition.PartitionChunk;
import org.joda.time.Interval;
import java.io.File;
import java.io.IOException;
import java.util.List;
import java.util.Map;
import java.util.Set;
public class IngestSegmentFirehoseFactory implements FirehoseFactory
{
private static final EmittingLogger log = new EmittingLogger(IngestSegmentFirehoseFactory.class);
private final String dataSource;
private final Interval interval;
private final DimFilter dimFilter;
private final List dimensions;
private final List metrics;
private final Injector injector;
@JsonCreator
public IngestSegmentFirehoseFactory(
@JsonProperty("dataSource") final String dataSource,
@JsonProperty("interval") Interval interval,
@JsonProperty("filter") DimFilter dimFilter,
@JsonProperty("dimensions") List dimensions,
@JsonProperty("metrics") List metrics,
@JacksonInject Injector injector
)
{
Preconditions.checkNotNull(dataSource, "dataSource");
Preconditions.checkNotNull(interval, "interval");
this.dataSource = dataSource;
this.interval = interval;
this.dimFilter = dimFilter;
this.dimensions = dimensions;
this.metrics = metrics;
this.injector = injector;
}
@JsonProperty
public String getDataSource()
{
return dataSource;
}
@JsonProperty
public Interval getInterval()
{
return interval;
}
@JsonProperty("filter")
public DimFilter getDimensionsFilter()
{
return dimFilter;
}
@JsonProperty
public List getDimensions()
{
return dimensions;
}
@JsonProperty
public List getMetrics()
{
return metrics;
}
@Override
public Firehose connect(InputRowParser inputRowParser) throws IOException, ParseException
{
log.info("Connecting firehose: dataSource[%s], interval[%s]", dataSource, interval);
// better way to achieve this is to pass toolbox to Firehose, The instance is initialized Lazily on connect method.
// Noop Task is just used to create the toolbox and list segments.
final TaskToolbox toolbox = injector.getInstance(TaskToolboxFactory.class).build(
new NoopTask("reingest", 0, 0, null, null, null)
);
try {
final List usedSegments = toolbox
.getTaskActionClient()
.submit(new SegmentListUsedAction(dataSource, interval));
final Map segmentFileMap = toolbox.fetchSegments(usedSegments);
VersionedIntervalTimeline timeline = new VersionedIntervalTimeline<>(
Ordering.natural().nullsFirst()
);
for (DataSegment segment : usedSegments) {
timeline.add(segment.getInterval(), segment.getVersion(), segment.getShardSpec().createChunk(segment));
}
final List> timeLineSegments = timeline.lookup(
interval
);
final List dims;
if (dimensions != null) {
dims = dimensions;
} else if (inputRowParser.getParseSpec().getDimensionsSpec().hasCustomDimensions()) {
dims = inputRowParser.getParseSpec().getDimensionsSpec().getDimensions();
} else {
Set dimSet = Sets.newHashSet(
Iterables.concat(
Iterables.transform(
timeLineSegments,
new Function, Iterable>()
{
@Override
public Iterable apply(
TimelineObjectHolder timelineObjectHolder
)
{
return Iterables.concat(
Iterables.transform(
timelineObjectHolder.getObject(),
new Function, Iterable>()
{
@Override
public Iterable apply(PartitionChunk input)
{
return input.getObject().getDimensions();
}
}
)
);
}
}
)
)
);
dims = Lists.newArrayList(
Sets.difference(
dimSet,
inputRowParser
.getParseSpec()
.getDimensionsSpec()
.getDimensionExclusions()
)
);
}
final List metricsList;
if (metrics != null) {
metricsList = metrics;
} else {
Set metricsSet = Sets.newHashSet(
Iterables.concat(
Iterables.transform(
timeLineSegments,
new Function, Iterable>()
{
@Override
public Iterable apply(
TimelineObjectHolder input
)
{
return Iterables.concat(
Iterables.transform(
input.getObject(),
new Function, Iterable>()
{
@Override
public Iterable apply(PartitionChunk input)
{
return input.getObject().getMetrics();
}
}
)
);
}
}
)
)
);
metricsList = Lists.newArrayList(metricsSet);
}
final List adapters = Lists.newArrayList(
Iterables.concat(
Iterables.transform(
timeLineSegments,
new Function, Iterable>()
{
@Override
public Iterable apply(final TimelineObjectHolder holder)
{
return
Iterables.transform(
holder.getObject(),
new Function, WindowedStorageAdapter>()
{
@Override
public WindowedStorageAdapter apply(final PartitionChunk input)
{
final DataSegment segment = input.getObject();
try {
return new WindowedStorageAdapter(
new QueryableIndexStorageAdapter(
IndexIO.loadIndex(
Preconditions.checkNotNull(
segmentFileMap.get(segment),
"File for segment %s", segment.getIdentifier()
)
)
),
holder.getInterval()
);
}
catch (IOException e) {
throw Throwables.propagate(e);
}
}
}
);
}
}
)
)
);
return new IngestSegmentFirehose(adapters, dims, metricsList, dimFilter, QueryGranularity.NONE);
}
catch (IOException e) {
throw Throwables.propagate(e);
}
catch (SegmentLoadingException e) {
throw Throwables.propagate(e);
}
}
}