org.kitesdk.data.Datasets Maven / Gradle / Ivy
Show all versions of kite-data-core Show documentation
/*
* Copyright 2013 Cloudera Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.kitesdk.data;
import com.google.common.base.Preconditions;
import com.google.common.collect.Lists;
import java.net.URI;
import java.util.Collection;
import java.util.List;
import java.util.Map;
import org.apache.avro.Schema;
import org.apache.avro.generic.GenericRecord;
import org.kitesdk.data.spi.DatasetRepository;
import org.kitesdk.data.spi.AbstractDataset;
import org.kitesdk.data.spi.Constraints;
import org.kitesdk.data.spi.Pair;
import org.kitesdk.data.spi.Registration;
/**
* Methods for working with {@link Dataset} instances.
*
* URIs
*
* All methods require a URI that identifies a dataset, view, or
* repository. The URI must begin with the scheme {@code dataset:},
* {@code view:}, or {@code repo:}. The remainder of the URI is
* implementation specific, depending on the dataset scheme.
*
* For example, the URI {@code dataset:hive:movies/ratings}
* references a dataset named ratings in the
* movies namespace, stored in Hive.
*
* The URI {@code view:hive:movies/ratings?year=2015&month=3}
* references a view of the same ratings dataset. The view
* is filtered to include records from only March, 2015.
*
* See Dataset and View
* URIs for the available URI patterns.
*
* Dataset Descriptors
*
* Some methods require a {@link DatasetDescriptor} that encapsulates metadata
* about a dataset. Descriptors are built using a
* {@link DatasetDescriptor.Builder descriptor builder}.
*
* Entities
*
* Entities are analagous to records in database terminology.
* The term is used in the API to emphasize that an entity can include not
* only primitive objects, but also complex objects such as hash maps.
*
* Some methods accept an entity class that will be used by Kite when returning
* entities from a dataset or view.
*
* @since 0.8.0
*/
public class Datasets {
/**
* Load a {@link Dataset} or {@link View} for the given {@link URI}.
*
* URIs must begin with {@code dataset:} or {@code view:}. The remainder of
* the URI is implementation specific, depending on the dataset scheme.
*
* If you use a dataset URI, {@code load} returns the unfiltered dataset.
* If you use a view URI, {@code load} returns a {@code View} configured to
* read a subset of the dataset.
*
* @param uri a {@code Dataset} or {@code View} URI
* @param type a Java class that represents an entity in the dataset
* @param the type used for readers and writers created by this
* {@code Dataset}
* @param the type of {@code View} expected
* @return a {@code View} for the given URI
* @throws DatasetNotFoundException if there is no dataset for the given URI
* @throws NullPointerException if any arguments are {@code null}
* @throws IllegalArgumentException
* if {@code uri} is not a dataset or view URI
*/
@SuppressWarnings("unchecked")
public static > V load(URI uri, Class type) {
boolean isView = URIBuilder.VIEW_SCHEME.equals(uri.getScheme());
Preconditions.checkArgument(isView ||
URIBuilder.DATASET_SCHEME.equals(uri.getScheme()),
"Not a dataset or view URI: " + uri);
Preconditions.checkNotNull(type,
"The entity type can't be null, use Object.class to have the type"
+ " determined by the schema.");
Pair> pair =
Registration.lookupDatasetUri(URI.create(uri.getRawSchemeSpecificPart()));
DatasetRepository repo = pair.first();
Map uriOptions = pair.second();
Dataset dataset = repo.load(
uriOptions.get(URIBuilder.NAMESPACE_OPTION),
uriOptions.get(URIBuilder.DATASET_NAME_OPTION), type);
if (isView) {
return Datasets. view(dataset, uriOptions);
} else {
// if the URI isn't a view URI, only load the dataset
return (V) dataset;
}
}
/**
* Load a {@link Dataset} or {@link View} for the given {@link URI}.
*
* URIs must begin with {@code dataset:} or {@code view:}. The remainder of
* the URI is implementation specific, depending on the dataset scheme.
*
* If you use a dataset URI, {@code load} returns the unfiltered dataset.
* If you use a view URI, {@code load} returns a {@code View} configured to
* read a subset of the dataset.
*
* @param uri a {@code Dataset} or {@code View} URI
* @param the type of {@code View} expected
* @return a {@code View} for the given URI
* @throws DatasetNotFoundException if there is no dataset for the given URI
* @throws NullPointerException if any arguments are {@code null}
* @throws IllegalArgumentException
* if {@code uri} is not a dataset or view URI
*/
@SuppressWarnings("unchecked")
public static > V load(URI uri) {
return Datasets.load(uri, GenericRecord.class);
}
/**
* Load a {@link Dataset} or {@link View} for the given {@link URI}.
*
* URIs must begin with {@code dataset:} or {@code view:}. The remainder of
* the URI is implementation specific, depending on the dataset scheme.
*
* If you use a dataset URI, {@code load} returns the unfiltered dataset.
* If you use a view URI, {@code load} returns a {@code View} configured to
* read a subset of the dataset.
*
* @param uriString a {@code Dataset} or {@code View} URI
* @param type a Java class that represents an entity in the dataset
* @param the type used for readers and writers created by this
* {@code Dataset}
* @param the type of {@code View} expected
* @return a {@code View} for the given URI
* @throws DatasetNotFoundException if there is no dataset for the given URI
* @throws NullPointerException if any arguments are {@code null}
* @throws IllegalArgumentException
* if {@code uri} is not a dataset or view URI
*/
public static > V load(String uriString, Class type) {
return Datasets. load(URI.create(uriString), type);
}
/**
* Load a {@link Dataset} or {@link View} for the given {@link URI}.
*
* URIs must begin with {@code dataset:} or {@code view:}. The remainder of
* the URI is implementation specific, depending on the dataset scheme.
*
* If you use a dataset URI, {@code load} returns the unfiltered dataset.
* If you use a view URI, {@code load} returns a {@code View} configured to
* read a subset of the dataset.
*
* @param uriString a {@code Dataset} or {@code View} URI
* @param the type of {@code View} expected
* @return a {@code View} for the given URI
* @throws DatasetNotFoundException if there is no dataset for the given URI
* @throws NullPointerException if any arguments are {@code null}
* @throws IllegalArgumentException
* if {@code uri} is not a dataset or view URI
*/
public static > V load(String uriString) {
return Datasets.load(
uriString, GenericRecord.class);
}
/**
* Create a {@link Dataset} for the given dataset or view URI.
* {@code create} returns an empty dataset. You can use {@code DatasetWriter}
* to populate your dataset.
*
* URIs must begin with {@code dataset:} or {@code view:}. The remainder of
* the URI is implementation specific, depending on the dataset scheme. If the
* URI is a view URI, this method creates the underlying dataset and returns a
* view of it.
*
* @param uri a {@code Dataset} or {@code View} URI
* @param type a Java class that represents an entity in the dataset
* @param the type used for readers and writers created by this
* {@code Dataset}
* @param the type of {@code Dataset} or {@code View} expected
* @return a newly created {@code Dataset} responsible for the given URI
* @throws NullPointerException
* if {@code uri}, {@code descriptor}, or {@code type} is
* {@code null}
* @throws IllegalArgumentException
* if {@code uri} is not a dataset or view URI
* @throws DatasetExistsException
* if a {@code Dataset} for the given URI already exists
* @throws IncompatibleSchemaException
* if the schema is not compatible with existing datasets with
* shared storage (for example, in the same HBase table)
*/
@SuppressWarnings("unchecked")
public static > V create(URI uri, DatasetDescriptor descriptor, Class type) {
boolean isView = URIBuilder.VIEW_SCHEME.equals(uri.getScheme());
Preconditions.checkArgument(isView ||
URIBuilder.DATASET_SCHEME.equals(uri.getScheme()),
"Not a dataset or view URI: " + uri);
Preconditions.checkNotNull(type,
"The entity type can't be null, use Object.class to have the type"
+ " determined by the schema.");
Pair> pair =
Registration.lookupDatasetUri(URI.create(uri.getRawSchemeSpecificPart()));
DatasetRepository repo = pair.first();
Map uriOptions = pair.second();
if (descriptor.getLocation() == null && uriOptions.containsKey("location")) {
descriptor = new DatasetDescriptor.Builder(descriptor)
.location(uriOptions.get("location"))
.build();
}
Dataset dataset = repo.create(
uriOptions.get(URIBuilder.NAMESPACE_OPTION),
uriOptions.get(URIBuilder.DATASET_NAME_OPTION), descriptor, type);
if (isView) {
return Datasets. view(dataset, uriOptions);
} else {
return (V) dataset;
}
}
/**
* Create a {@link Dataset} for the given dataset or view URI.
* {@code create} returns an empty dataset. You can use {@code DatasetWriter}
* to populate your dataset.
*
* URIs must begin with {@code dataset:} or {@code view:}. The remainder of
* the URI is implementation specific, depending on the dataset scheme. If the
* URI is a view URI, this method creates the underlying dataset and returns a
* view of it.
*
* @param uri a {@code Dataset} or {@code View} URI
* @param the type of {@code Dataset} or {@code View} expected
* @return a newly created {@code Dataset} responsible for the given URI
* @throws NullPointerException
* if {@code uri} or {@code descriptor} is {@code null}
* @throws IllegalArgumentException
* if {@code uri} is not a dataset or view URI
* @throws DatasetExistsException
* if a {@code Dataset} for the given URI already exists
* @throws IncompatibleSchemaException
* if the schema is not compatible with existing datasets with
* shared storage (for example, in the same HBase table)
*/
@SuppressWarnings("unchecked")
public static > V create(URI uri, DatasetDescriptor descriptor) {
return Datasets.create(
uri, descriptor, GenericRecord.class);
}
/**
* Create a {@link Dataset} for the given dataset or view URI string.
* {@code create} returns an empty dataset. You can use {@code DatasetWriter}
* to populate your dataset.
*
* URIs must begin with {@code dataset:} or {@code view:}. The remainder of
* the URI is implementation specific, depending on the dataset scheme. If the
* URI is a view URI, this method creates the underlying dataset and returns a
* view of it.
*
* @param uri a {@code Dataset} or {@code View} URI string
* @param type a Java class that represents an entity in the dataset
* @param the type used for readers and writers created by this
* {@code Dataset}
* @param the type of {@code Dataset} or {@code View} expected
* @return a newly created {@code Dataset} responsible for the given URI
* @throws NullPointerException
* if {@code uri}, {@code descriptor}, or {@code type} is
* {@code null}
* @throws IllegalArgumentException
* if {@code uri} is not a dataset or view URI
* @throws DatasetExistsException
* if a {@code Dataset} for the given URI already exists
* @throws IncompatibleSchemaException
* if the schema is not compatible with existing datasets with
* shared storage (for example, in the same HBase table)
*/
public static > V create(String uri, DatasetDescriptor descriptor, Class type) {
return Datasets. create(URI.create(uri), descriptor, type);
}
/**
* Create a {@link Dataset} for the given dataset or view URI string.
* {@code create} returns an empty dataset. You can use {@code DatasetWriter}
* to populate your dataset.
*
* URIs must begin with {@code dataset:} or {@code view:}. The remainder of
* the URI is implementation specific, depending on the dataset scheme. If the
* URI is a view URI, this method creates the underlying dataset and returns a
* view of it.
*
* @param uri a {@code Dataset} or {@code View} URI string
* @param the type of {@code Dataset} or {@code View} expected
* @return a newly created {@code Dataset} responsible for the given URI
* @throws NullPointerException
* if {@code uri} or {@code descriptor} is {@code null}
* @throws IllegalArgumentException
* if {@code uri} is not a dataset or view URI
* @throws DatasetExistsException
* if a {@code Dataset} for the given URI already exists
* @throws IncompatibleSchemaException
* if the schema is not compatible with existing datasets with
* shared storage (for example, in the same HBase table)
*/
@SuppressWarnings("unchecked")
public static > V create(String uri, DatasetDescriptor descriptor) {
return Datasets.create(
uri, descriptor, GenericRecord.class);
}
/**
* Update a {@link Dataset} for the given dataset or view URI.
*
* You can add columns, remove columns, or change the data type of columns
* in your dataset, provided you don't attempt a change that is incompatible
* with written data. Avro defines rules for compatible schema evolution. See
* Schema
* Evolution.
*
* This method updates the dataset descriptor, so you can also add
* or change properties.
*
* The recommended way to update a dataset descriptor is to build it
* based on an existing descriptor. Use
* {@link DatasetDescriptor.Builder(DatasetDescriptor)} to
* build a DatasetDescriptor based on an existing instance.
*
* You cannot change a dataset format or partition strategy.
*
* URIs must begin with {@code dataset:}. The remainder of
* the URI is implementation specific, depending on the dataset scheme.
*
* @param uri a {@code Dataset} URI
* @param type a Java class that represents an entity in the dataset
* @param the type used for readers and writers created by this
* {@code Dataset}
* @param the type of {@code Dataset} expected
* @return a {@code Dataset} for the given URI
* @throws NullPointerException
* if {@code uri}, {@code descriptor}, or {@code type} is
* {@code null}
* @throws IllegalArgumentException if {@code uri} is not a dataset URI
* @throws DatasetNotFoundException
* if there is no dataset for the given URI
* @throws UnsupportedOperationException
* if descriptor updates are not supported by the implementation
* @throws ConcurrentSchemaModificationException
* if the {@code Dataset} schema is updated concurrently
* @throws IncompatibleSchemaException
* if the schema is not compatible with previous schemas, or with
* existing datasets with shared storage (for example, in the same
* HBase table)
*/
@SuppressWarnings("unchecked")
public static > D update(
URI uri, DatasetDescriptor descriptor, Class type) {
Preconditions.checkArgument(
URIBuilder.DATASET_SCHEME.equals(uri.getScheme()),
"Not a dataset or view URI: " + uri);
Preconditions.checkNotNull(type,
"The entity type can't be null, use Object.class to have the type"
+ " determined by the schema.");
Pair> pair =
Registration.lookupDatasetUri(URI.create(uri.getRawSchemeSpecificPart()));
DatasetRepository repo = pair.first();
Map uriOptions = pair.second();
return (D) repo.update(
uriOptions.get(URIBuilder.NAMESPACE_OPTION),
uriOptions.get(URIBuilder.DATASET_NAME_OPTION), descriptor, type);
}
/**
* Update a {@link Dataset} for the given dataset or view URI.
*
* You can add columns, remove columns, or change the data type of columns
* in your dataset, provided you don't attempt a change that is incompatible
* with written data. Avro defines rules for compatible schema evolution. See
* Schema
* Evolution.
*
* This method updates the dataset descriptor, so you can also add
* or change properties.
*
* The recommended way to update a dataset descriptor is to build it
* based on an existing descriptor. Use
* {@link DatasetDescriptor.Builder(DatasetDescriptor)} to
* build a DatasetDescriptor based on an existing instance.
*
* You cannot change a dataset format or partition strategy.
*
* URIs must begin with {@code dataset:}. The remainder of
* the URI is implementation specific, depending on the dataset scheme.
*
* @param uri a {@code Dataset} URI
* @param the type of {@code Dataset} expected
* @return a {@code Dataset} for the given URI
* @throws NullPointerException
* if {@code uri} or {@code descriptor} is {@code null}
* @throws IllegalArgumentException if {@code uri} is not a dataset URI
* @throws DatasetNotFoundException
* if there is no dataset for the given URI
* @throws UnsupportedOperationException
* if descriptor updates are not supported by the implementation
* @throws ConcurrentSchemaModificationException
* if the {@code Dataset} schema is updated concurrently
* @throws IncompatibleSchemaException
* if the schema is not compatible with previous schemas, or with
* existing datasets with shared storage (for example, in the same
* HBase table)
*/
@SuppressWarnings("unchecked")
public static > D update(
URI uri, DatasetDescriptor descriptor) {
return Datasets.update(
uri, descriptor, GenericRecord.class);
}
/**
* Update a {@link Dataset} for the given dataset or view URI string.
*
* You can add columns, remove columns, or change the data type of columns
* in your dataset, provided you don't attempt a change that is incompatible
* with written data. Avro defines rules for compatible schema evolution. See
* Schema
* Evolution.
*
* This method updates the dataset descriptor, so you can also add
* or change properties.
*
* The recommended way to update a dataset descriptor is to build it
* based on an existing descriptor. Use
* {@link DatasetDescriptor.Builder(DatasetDescriptor)} to
* build a DatasetDescriptor based on an existing instance.
*
* You cannot change a dataset format or partition strategy.
*
* URIs must begin with {@code dataset:}. The remainder of
* the URI is implementation specific, depending on the dataset scheme.
*
* @param uri a {@code Dataset} URI string
* @param type a Java class that represents an entity in the dataset
* @param the type used for readers and writers created by this
* {@code Dataset}
* @param the type of {@code Dataset} expected
* @return a {@code Dataset} for the given URI
* @throws NullPointerException
* if {@code uri}, {@code descriptor}, or {@code type} is
* {@code null}
* @throws IllegalArgumentException if {@code uri} is not a dataset URI
* @throws DatasetNotFoundException
* if there is no dataset for the given URI
* @throws UnsupportedOperationException
* if descriptor updates are not supported by the implementation
* @throws ConcurrentSchemaModificationException
* if the {@code Dataset} schema is updated concurrently
* @throws IncompatibleSchemaException
* if the schema is not compatible with previous schemas, or with
* existing datasets with shared storage (for example, in the same
* HBase table)
*/
public static > D update(String uri, DatasetDescriptor descriptor, Class type) {
return Datasets. update(URI.create(uri), descriptor, type);
}
/**
* Update a {@link Dataset} for the given dataset or view URI string.
*
* You can add columns, remove columns, or change the data type of columns
* in your dataset, provided you don't attempt a change that is incompatible
* with written data. Avro defines rules for compatible schema evolution. See
* Schema
* Evolution.
*
* This method updates the dataset descriptor, so you can also add
* or change properties.
*
* The recommended way to update a dataset descriptor is to build it
* based on an existing descriptor. Use
* {@link DatasetDescriptor.Builder(DatasetDescriptor)} to
* build a DatasetDescriptor based on an existing instance.
*
* You cannot change a dataset format or partition strategy.
*
* URIs must begin with {@code dataset:}. The remainder of
* the URI is implementation specific, depending on the dataset scheme.
*
* @param uri a {@code Dataset} URI string
* @param the type of {@code Dataset} expected
* @return a {@code Dataset} for the given URI
* @throws NullPointerException
* if {@code uri} or {@code descriptor} is {@code null}
* @throws IllegalArgumentException if {@code uri} is not a dataset URI
* @throws DatasetNotFoundException
* if there is no dataset for the given URI
* @throws UnsupportedOperationException
* if descriptor updates are not supported by the implementation
* @throws ConcurrentSchemaModificationException
* if the {@code Dataset} schema is updated concurrently
* @throws IncompatibleSchemaException
* if the schema is not compatible with previous schemas, or with
* existing datasets with shared storage (for example, in the same
* HBase table)
*/
public static > D update(String uri, DatasetDescriptor descriptor) {
return Datasets.update(
uri, descriptor, GenericRecord.class);
}
/**
* Delete a {@link Dataset} identified by the given dataset URI.
*
* When you call this method using a dataset URI, both data and metadata are
* deleted. After you call this method, the dataset no longer exists, unless
* an exception is thrown.
*
* When you call this method using a view URI, data in that view is deleted.
* The dataset's metadata is not changed. This can throw an
* {@code UnsupportedOperationException} if the delete requires additional
* work. For example, if some, but not all, of the data in an underlying data
* file must be removed, then the implementation is allowed to reject the
* deletion rather than copy the remaining records to a new file.
* An implementation must document under what conditions it accepts deletes,
* and under what conditions it rejects them.
*
* URIs must begin with {@code dataset:}. The remainder of
* the URI is implementation specific, depending on the dataset scheme.
*
* @param uri a {@code Dataset} URI
* @return {@code true} if any data or metadata is removed, {@code false}
* otherwise
* @throws NullPointerException if {@code uri} is null
* @throws IllegalArgumentException if {@code uri} is not a dataset URI
*/
public static boolean delete(URI uri) {
Preconditions.checkArgument(
URIBuilder.DATASET_SCHEME.equals(uri.getScheme()),
"Not a dataset URI: " + uri);
Pair> pair =
Registration.lookupDatasetUri(URI.create(uri.getRawSchemeSpecificPart()));
DatasetRepository repo = pair.first();
Map uriOptions = pair.second();
return repo.delete(
uriOptions.get(URIBuilder.NAMESPACE_OPTION),
uriOptions.get(URIBuilder.DATASET_NAME_OPTION));
}
/**
* Delete a {@link Dataset} identified by the given dataset URI string.
*
* When you call this method using a dataset URI, both data and metadata are
* deleted. After you call this method, the dataset no longer exists, unless
* an exception is thrown.
*
* When you call this method using a view URI, data in that view is deleted.
* The dataset's metadata is not changed. This can throw an
* {@code UnsupportedOperationException} if the delete requires additional
* work. For example, if some, but not all, of the data in an underlying data
* file must be removed, then the implementation is allowed to reject the
* deletion rather than copy the remaining records to a new file.
* An implementation must document under what conditions it accepts deletes,
* and under what conditions it rejects them.
*
* URIs must begin with {@code dataset:}. The remainder of
* the URI is implementation specific, depending on the dataset scheme.
*
* @param uri a {@code Dataset} URI string
* @return {@code true} if any data or metadata is removed, {@code false}
* otherwise
* @throws NullPointerException if {@code uri} is null
* @throws IllegalArgumentException if {@code uri} is not a dataset URI
*/
public static boolean delete(String uri) {
return delete(URI.create(uri));
}
/**
* Check whether a {@link Dataset} identified by the given URI exists.
*
* URIs must begin with {@code dataset:}. The remainder of
* the URI is implementation specific, depending on the dataset scheme.
*
* @param uri a {@code Dataset} URI
* @return {@code true} if the dataset exists, {@code false} otherwise
* @throws NullPointerException if {@code uri} is null
* @throws IllegalArgumentException if {@code uri} is not a dataset URI
*/
public static boolean exists(URI uri) {
Preconditions.checkArgument(
URIBuilder.DATASET_SCHEME.equals(uri.getScheme()),
"Not a dataset URI: " + uri);
Pair> pair =
Registration.lookupDatasetUri(URI.create(uri.getRawSchemeSpecificPart()));
DatasetRepository repo = pair.first();
Map uriOptions = pair.second();
return repo.exists(
uriOptions.get(URIBuilder.NAMESPACE_OPTION),
uriOptions.get(URIBuilder.DATASET_NAME_OPTION));
}
/**
* Check whether a {@link Dataset} identified by the given URI string exists.
*
* URIs must begin with {@code dataset:}. The remainder of
* the URI is implementation specific, depending on the dataset scheme.
*
* @param uri a {@code Dataset} URI string
* @return {@code true} if the dataset exists, {@code false} otherwise
* @throws NullPointerException if {@code uri} is null
* @throws IllegalArgumentException if {@code uri} is not a dataset URI
*/
public static boolean exists(String uri) {
return exists(URI.create(uri));
}
/**
* List the {@link Dataset} URIs in the repository identified by the URI.
*
* URI formats are defined by {@code Dataset} implementations. The repository
* URIs you pass to this method must begin with {@code repo:}. For example, to
* list the {@code Dataset} URIs for the Hive repository, provide the URI
* {@code repo:hive}.
*
* @param uri a {@code DatasetRepository} URI
* @return the URIs present in the {@code DatasetRepository}
* @throws NullPointerException if {@code uri} is null
* @throws IllegalArgumentException if {@code uri} is not a repository URI
*/
public static Collection list(URI uri) {
boolean isRepo = URIBuilder.REPO_SCHEME.equals(uri.getScheme());
Preconditions.checkArgument(isRepo, "Not a repository URI: " + uri);
DatasetRepository repo = Registration
.open(URI.create(uri.getRawSchemeSpecificPart()));
// build a URI for each dataset name
URI repoUri = repo.getUri();
List datasets = Lists.newArrayList();
for (String namespace : repo.namespaces()) {
for (String dataset : repo.datasets(namespace)) {
datasets.add(new URIBuilder(repoUri, namespace, dataset).build());
}
}
return datasets;
}
/**
* List the {@link Dataset} URIs in the repository identified by the URI
* string.
*
* URI formats are defined by {@code Dataset} implementations. The repository
* URIs you pass to this method must begin with {@code repo:}. For example, to
* list the {@code Dataset} URIs for the Hive repository, provide the URI
* {@code repo:hive}.
*
* @param uri a {@code DatasetRepository} URI string
* @return the URIs present in the {@code DatasetRepository}
* @throws NullPointerException if {@code URI} is null
* @throws IllegalArgumentException if {@code uri} is not a repository URI
*/
public static Collection list(String uri) {
return list(URI.create(uri));
}
@SuppressWarnings("unchecked")
private static > V view(Dataset dataset,
Map uriOptions) {
if (dataset instanceof AbstractDataset) {
DatasetDescriptor descriptor = dataset.getDescriptor();
Schema schema = descriptor.getSchema();
PartitionStrategy strategy = null;
if (descriptor.isPartitioned()) {
strategy = descriptor.getPartitionStrategy();
}
Constraints constraints = Constraints.fromQueryMap(
schema, strategy, uriOptions);
return (V) ((AbstractDataset) dataset).filter(constraints);
} else {
return (V) dataset;
}
}
}