All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.bazaarvoice.emodb.hadoop.io.LocationUtil Maven / Gradle / Ivy

There is a newer version: 6.2.3
Show newest version
package com.bazaarvoice.emodb.hadoop.io;

import com.bazaarvoice.curator.ResolvingEnsembleProvider;
import com.bazaarvoice.emodb.sor.client.DataStoreClient;
import com.bazaarvoice.emodb.sor.client.DataStoreFixedHostDiscoverySource;
import com.bazaarvoice.ostrich.HostDiscovery;
import com.bazaarvoice.ostrich.discovery.zookeeper.ZooKeeperHostDiscovery;
import com.codahale.metrics.MetricRegistry;
import com.google.common.base.Charsets;
import com.google.common.base.Function;
import com.google.common.base.Joiner;
import com.google.common.base.Objects;
import com.google.common.base.Optional;
import com.google.common.base.Predicate;
import com.google.common.collect.FluentIterable;
import com.google.common.collect.ImmutableSet;
import com.google.common.collect.Ordering;
import com.google.common.util.concurrent.ThreadFactoryBuilder;
import org.apache.curator.framework.CuratorFramework;
import org.apache.curator.framework.CuratorFrameworkFactory;
import org.apache.curator.retry.BoundedExponentialBackoffRetry;
import org.apache.http.NameValuePair;
import org.apache.http.client.utils.URLEncodedUtils;

import javax.ws.rs.core.UriBuilder;
import java.net.URI;
import java.util.Arrays;
import java.util.List;
import java.util.Set;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

import static com.google.common.base.Preconditions.checkArgument;
import static com.google.common.base.Preconditions.checkState;
import static java.lang.String.format;

/**
 * Locations take take one of the following forms (all starting with "emodb://"):
 *
 * ci.us/tablename
 * ci.us.group/tablename
 * ci.us/tablename?zkConnectString=zookeeper-connect-string
 * ci.us/tablename?host=hostname
 *
 * For local host:
 *
 * local/tablename
 * local.group/tablename
 * local/tablename?zkConnectString=zookeeper-connect-string
 * local/tablename?host=hostname
 *
 * Alternately the caller can provide a direct emodb URL, such as "emodb://localhost:8080/tablename"
 */
public class LocationUtil {
    private static final String DEFAULT_ZK_CONNECTION_STRING = "zookeeper1:2181,zookeeper2:2181,zookeeper3:2181";
    private static final String DEFAULT_LOCAL_ZK_CONNECTION_STRING = "localhost:2181";
    private static final String DEFAULT_REGION = "us-east-1";
    private static final String DEFAULT_GROUP = "default";

    private static final String EMODB_SCHEME = "emodb";
    private static final String STASH_SCHEME = "emostash";
    private static final String ZK_CONNECTION_STRING_PARAM = "zkConnectionString";
    private static final String HOST_PARAM = "host";

    private static final Pattern LOCATOR_PATTERN = Pattern.compile(
            "^((?ci|cert|anon|uat|bazaar)(\\.(?us|us-east-1|eu|eu-west-1))?|local)" +
            "(\\.(?.+?))?$");

    // Supported regions
    private static enum Region {
        us ("us-east-1", "us"),
        eu ("eu-west-1", "eu");

        private final String _defaultAlias;
        private final Set _aliases;

        private Region(String... aliases) {
            _defaultAlias = aliases[0];
            _aliases = ImmutableSet.copyOf(aliases);
        }

        public Set getAliases() {
            return _aliases;
        }

        public String toString() {
            return _defaultAlias;
        }
    }

    private static Region getRegion(String value) {
        for (Region region : Region.values()) {
            if (region.getAliases().contains(value)) {
                return region;
            }
        }
        throw new IllegalArgumentException("Invalid region " + value);
    }

    /**
     * Returns the location type from a location URI.
     */
    public static LocationType getLocationType(URI location) {
        String scheme = location.getScheme();
        if (EMODB_SCHEME.equals(scheme)) {
            // If the host matches the locator pattern then assume host discovery, otherwise it's a
            // direct URL to an EmoDB server
            if (LOCATOR_PATTERN.matcher(location.getHost()).matches()) {
                return LocationType.EMO_HOST_DISCOVERY;
            } else {
                return LocationType.EMO_URL;
            }
        } else if (STASH_SCHEME.equals(scheme)) {
            return LocationType.STASH;
        }

        throw new IllegalArgumentException("Invalid location: " + location);
    }

    private static Matcher getLocatorMatcher(URI location) {
        return LOCATOR_PATTERN.matcher(location.getHost());
    }

    /**
     * Converts a location URI to a data source identifier.
     */
    public static String getDataStoreIdentifier(URI location, String apiKey) {
        checkArgument(getLocationType(location) != LocationType.STASH, "Stash locations do not have a data source ID");

        UriBuilder uriBuilder = UriBuilder.fromUri(location)
                .userInfo(apiKey)
                .replacePath(null)
                .replaceQuery(null);

        if (getLocationType(location) == LocationType.EMO_HOST_DISCOVERY) {
            Optional zkConnectionStringOverride = getZkConnectionStringOverride(location);
            if (zkConnectionStringOverride.isPresent()) {
                uriBuilder.queryParam(ZK_CONNECTION_STRING_PARAM, zkConnectionStringOverride.get());
            }
            Optional> hosts = getHostOverride(location);
            if (hosts.isPresent()) {
                for (String host : hosts.get()) {
                    uriBuilder.queryParam(HOST_PARAM, host);
                }
            }
        }
        return uriBuilder.build().toString();
    }

    public static Optional getZkConnectionStringOverride(URI location) {
        List params = URLEncodedUtils.parse(location, Charsets.UTF_8.name());
        for (NameValuePair pair : params) {
            if (ZK_CONNECTION_STRING_PARAM.equals(pair.getName())) {
                // Deterministically sort the hosts
                String[] hosts = pair.getValue().split(",");
                Arrays.sort(hosts);
                String zkConnectionString = Joiner.on(",").join(hosts);
                return Optional.of(zkConnectionString);
            }
        }
        return Optional.absent();
    }

    public static URI setZkConnectionStringOverride(URI location, String zkConnectionString) {
        return UriBuilder.fromUri(location)
                .replaceQueryParam(ZK_CONNECTION_STRING_PARAM, zkConnectionString)
                .build();
    }

    public static Optional> getHostOverride(URI location) {
        List params = URLEncodedUtils.parse(location, Charsets.UTF_8.name());
        List hosts = FluentIterable.from(params)
                .filter(new Predicate() {
                    public boolean apply(NameValuePair pair) {
                        return HOST_PARAM.equals(pair.getName());
                    }
                })
                .transform(new Function() {
                    public String apply(NameValuePair pair) {
                        return pair.getValue();
                    }
                })
                .toSortedList(Ordering.natural());

        return hosts.isEmpty() ? Optional.>absent() : Optional.of(hosts);
    }

    public static URI setHostsOverride(URI location, String... hosts) {
        return UriBuilder.fromUri(location)
                .replaceQueryParam(HOST_PARAM, hosts)
                .build();
    }

    /**
     * Returns a configured, started Curator for a given location, or absent if the location does not
     * use host discovery.
     */
    public static Optional getCuratorForLocation(URI location) {
        final String defaultConnectionString;
        final String namespace;

        if (getLocationType(location) != LocationType.EMO_HOST_DISCOVERY) {
            // Only host discovery may require ZooKeeper
            return Optional.absent();
        }

        if (getHostOverride(location).isPresent()) {
            // Fixed host discovery doesn't require ZooKeeper
            return Optional.absent();
        }

        Matcher matcher = getLocatorMatcher(location);
        checkArgument(matcher.matches(), "Invalid location: %s", location);

        if (matcher.group("universe") != null) {
            // Normal host discovery
            String universe = matcher.group("universe");
            Region region = getRegion(Objects.firstNonNull(matcher.group("region"), DEFAULT_REGION));
            namespace = format("%s/%s", universe, region);
            defaultConnectionString = DEFAULT_ZK_CONNECTION_STRING;
        } else {
            // Local host discovery; typically for developer testing
            namespace = null;
            defaultConnectionString = DEFAULT_LOCAL_ZK_CONNECTION_STRING;
        }

        String connectionString = getZkConnectionStringOverride(location).or(defaultConnectionString);

        CuratorFramework curator = CuratorFrameworkFactory.builder()
                .ensembleProvider(new ResolvingEnsembleProvider(connectionString))
                .retryPolicy(new BoundedExponentialBackoffRetry(100, 1000, 10))
                .threadFactory(new ThreadFactoryBuilder().setNameFormat("emo-zookeeper-%d").build())
                .namespace(namespace)
                .build();

        curator.start();

        return Optional.of(curator);
    }

    /**
     * Returns the EmoDB cluster name associated with the given location.
     */
    public static String getClusterForLocation(URI location) {
        Matcher matcher = getLocatorMatcher(location);
        checkArgument(matcher.matches(), "Invalid location: %s", location);

        final String clusterPrefix;

        if (matcher.group("universe") != null) {
            clusterPrefix = matcher.group("universe");
        } else {
            clusterPrefix = "local";
        }

        String group = Objects.firstNonNull(matcher.group("group"), DEFAULT_GROUP);
        return format("%s_%s", clusterPrefix, group);
    }

    /**
     * Returns the HostDiscovery for a given location.  The curator should come from a previous call to
     * {@link #getCuratorForLocation(java.net.URI)} to the same location.
     */
    public static HostDiscovery getHostDiscoveryForLocation(URI location, Optional curator, String serviceName, MetricRegistry metricRegistry) {
        checkArgument(getLocationType(location) == LocationType.EMO_HOST_DISCOVERY, "Location does not use host discovery");
        Optional> hosts = getHostOverride(location);
        if (hosts.isPresent()) {
            return createFixedHostDiscovery(serviceName, hosts.get().toArray(new String[hosts.get().size()]));
        } else {
            checkState(curator.isPresent(), "curator required");
            return createZooKeeperHostDiscovery(curator.get(), serviceName, metricRegistry);
        }
    }

    private static HostDiscovery createZooKeeperHostDiscovery(CuratorFramework curator, String serviceName, MetricRegistry metricRegistry) {
        return new ZooKeeperHostDiscovery(curator, serviceName, metricRegistry);
    }

    private static HostDiscovery createFixedHostDiscovery(String serviceName, String... hosts) {
        return new DataStoreFixedHostDiscoverySource(hosts).forService(serviceName);
    }

    /**
     * Returns the base URI for a location of type {@link LocationType#EMO_URL}.
     */
    public static URI getBaseUriForLocation(URI location) {
        // Any request to the public ELBs must be made over https
        boolean useHttps = location.getHost().toLowerCase().endsWith("nexus.bazaarvoice.com");

        return UriBuilder.fromUri(location)
                .scheme(useHttps ? "https" : "http")
                .replacePath(DataStoreClient.SERVICE_PATH)
                .replaceQuery(null)
                .build();
    }

    /**
     * Returns a location URI from a source and table name.
     */
    public static URI toLocation(String source, String table) {
        // Verify the source is a valid EmoDB URI
        URI sourceUri = URI.create(source);
        return toLocation(sourceUri, table);
    }

    /**
     * Returns a Location URI from a source URI and table name.
     */
    public static URI toLocation(URI sourceUri, String table) {
        getLocationType(sourceUri); // Raises an exception if the location type is invalid.
        return UriBuilder.fromUri(sourceUri).path(table).build();
    }

    /**
     * Returns location information for a given location of type {@link LocationType#STASH}.
     */
    public static StashLocation getStashLocation(URI location) {
        checkArgument(getLocationType(location) == LocationType.STASH, "Not a stash location");

        String host, path;
        boolean useLatestDirectory;

        Matcher matcher = getLocatorMatcher(location);
        if (matcher.matches()) {
            // Stash is only available for the default group.  Make sure that's the case here
            String group = Objects.firstNonNull(matcher.group("group"), DEFAULT_GROUP);
            checkArgument(DEFAULT_GROUP.equals(group), "Stash not available for group: %s", group);

            // Stash it not available for local
            String universe = matcher.group("universe");
            checkArgument(universe != null, "Stash not available for local");

            Region region = getRegion(Objects.firstNonNull(matcher.group("region"), DEFAULT_REGION));

            host = getS3BucketForRegion(region);
            path = getS3PathForUniverse(universe);
            useLatestDirectory = true;
        } else {
            // The location refers to the actual S3 bucket and path
            host = location.getHost();
            path = location.getPath();
            // Stash paths don't have a leading slash
            if (path != null && path.startsWith("/")) {
                path = path.substring(1);
            }
            useLatestDirectory = false;
        }

        return new StashLocation(host, path, useLatestDirectory);
    }

    private static String getS3BucketForRegion(Region region) {
        return format("emodb-%s", region);
    }

    private static String getS3PathForUniverse(String universe) {
        return format("stash/%s", universe);
    }
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy