io.druid.server.lookup.namespace.URIExtractionNamespaceCacheFactory Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of druid-lookups-cached-global Show documentation
Show all versions of druid-lookups-cached-global Show documentation
Extension to rename Druid dimension values using namespaces
/*
* Licensed to Metamarkets Group Inc. (Metamarkets) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. Metamarkets licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package io.druid.server.lookup.namespace;
import com.google.common.base.Throwables;
import com.google.common.io.ByteSource;
import com.google.inject.Inject;
import com.metamx.common.CompressionUtils;
import com.metamx.common.IAE;
import com.metamx.common.RetryUtils;
import com.metamx.common.logger.Logger;
import io.druid.data.SearchableVersionedDataFinder;
import io.druid.data.input.MapPopulator;
import io.druid.query.lookup.namespace.ExtractionNamespaceCacheFactory;
import io.druid.query.lookup.namespace.URIExtractionNamespace;
import io.druid.segment.loading.URIDataPuller;
import javax.annotation.Nullable;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.io.InputStream;
import java.net.URI;
import java.util.Map;
import java.util.concurrent.Callable;
import java.util.regex.Pattern;
/**
*
*/
public class URIExtractionNamespaceCacheFactory implements ExtractionNamespaceCacheFactory
{
private static final int DEFAULT_NUM_RETRIES = 3;
private static final Logger log = new Logger(URIExtractionNamespaceCacheFactory.class);
private final Map pullers;
@Inject
public URIExtractionNamespaceCacheFactory(
Map pullers
)
{
this.pullers = pullers;
}
@Override
public Callable getCachePopulator(
final String id,
final URIExtractionNamespace extractionNamespace,
@Nullable final String lastVersion,
final Map cache
)
{
return new Callable()
{
@Override
public String call()
{
final boolean doSearch = extractionNamespace.getUriPrefix() != null;
final URI originalUri = doSearch ? extractionNamespace.getUriPrefix() : extractionNamespace.getUri();
final SearchableVersionedDataFinder pullerRaw = pullers.get(originalUri.getScheme());
if (pullerRaw == null) {
throw new IAE(
"Unknown loader type[%s]. Known types are %s",
originalUri.getScheme(),
pullers.keySet()
);
}
if (!(pullerRaw instanceof URIDataPuller)) {
throw new IAE(
"Cannot load data from location [%s]. Data pulling from [%s] not supported",
originalUri,
originalUri.getScheme()
);
}
final URIDataPuller puller = (URIDataPuller) pullerRaw;
final URI uri;
if (doSearch) {
final Pattern versionRegex;
if (extractionNamespace.getFileRegex() != null) {
versionRegex = Pattern.compile(extractionNamespace.getFileRegex());
} else {
versionRegex = null;
}
uri = pullerRaw.getLatestVersion(
extractionNamespace.getUriPrefix(),
versionRegex
);
if (uri == null) {
throw new RuntimeException(
new FileNotFoundException(
String.format(
"Could not find match for pattern `%s` in [%s] for %s",
versionRegex,
originalUri,
extractionNamespace
)
)
);
}
} else {
uri = extractionNamespace.getUri();
}
final String uriPath = uri.getPath();
try {
return RetryUtils.retry(
new Callable()
{
@Override
public String call() throws Exception
{
final String version = puller.getVersion(uri);
try {
// Important to call equals() against version because lastVersion could be null
if (version.equals(lastVersion)) {
log.debug(
"URI [%s] for namespace [%s] has the same last modified time [%s] as the last cached. " +
"Skipping ",
uri.toString(),
id,
version
);
return lastVersion;
}
}
catch (NumberFormatException ex) {
log.debug(ex, "Failed to get last modified timestamp. Assuming no timestamp");
}
final ByteSource source;
if (CompressionUtils.isGz(uriPath)) {
// Simple gzip stream
log.debug("Loading gz");
source = new ByteSource()
{
@Override
public InputStream openStream() throws IOException
{
return CompressionUtils.gzipInputStream(puller.getInputStream(uri));
}
};
} else {
source = new ByteSource()
{
@Override
public InputStream openStream() throws IOException
{
return puller.getInputStream(uri);
}
};
}
final long lineCount = new MapPopulator<>(
extractionNamespace.getNamespaceParseSpec()
.getParser()
).populate(source, cache);
log.info(
"Finished loading %d lines for namespace [%s]",
lineCount,
id
);
return version;
}
},
puller.shouldRetryPredicate(),
DEFAULT_NUM_RETRIES
);
}
catch (Exception e) {
throw Throwables.propagate(e);
}
}
};
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy