Many resources are needed to download a project. Please understand that we have to compensate our server costs. Thank you in advance. Project price only 1 $
You can buy this project and download/modify it how often you want.
A table. This is is represented as a directory in the file system.
*
*
/table_name/split
*
A split. A split is a file in the file system.
*
*
* Note the the status for splits can be gotten from the file system but they cannot be opened. The caller must
* use one of the EmoInputFormat classes to actually read a split file's contents.
*/
public class EmoFileSystem extends FileSystem implements EmoInputSplittable {
private URI _uri;
private String _apiKey;
private Path _rootPath;
private int _splitSize;
private MetricRegistry _metricRegistry;
public EmoFileSystem() {
// Since this class should be used outside of the context of a Dropwizard server, we simply create our own
// MetricRegistry instance instead of relying on the environment's.
_metricRegistry = new MetricRegistry();
}
@Override
public String getScheme() {
return "emodb";
}
@Override
public void initialize(URI location, Configuration conf)
throws IOException {
super.initialize(location, conf);
Optional explicitZkConnectionString = LocationUtil.getZkConnectionStringOverride(location);
Optional> explicitHosts = LocationUtil.getHostOverride(location);
// Set the ZooKeeper connection string if it is present in the config and not explicitly set in the location
if (!explicitZkConnectionString.isPresent()) {
String zkConnectionString = conf.get(ConfigurationParameters.ZOOKEEPER_CONNECTION_STRING_PARAM);
if (zkConnectionString != null) {
location = LocationUtil.setZkConnectionStringOverride(location, zkConnectionString);
}
}
// Set the hosts if they is present in the config and not explicitly set in the location
if (!explicitHosts.isPresent()) {
String hosts = conf.get(ConfigurationParameters.HOSTS_PARAM);
if (hosts != null) {
location = LocationUtil.setHostsOverride(location, hosts.split(","));
}
}
_uri = UriBuilder.fromUri(location).replacePath("/").build();
_apiKey = conf.get(ConfigurationParameters.EMO_API_KEY);
_rootPath = new Path(_uri);
_splitSize = BaseInputFormat.getSplitSize(conf);
}
@Override
public URI getUri() {
return _uri;
}
@Override
public FileStatus[] listStatus(Path path)
throws IOException {
if (path.equals(_rootPath)) {
// Root path. List all tables as subdirectories.
try (CloseableDataStore dataStore = HadoopDataStoreManager.getInstance().getDataStore(_uri, _apiKey, _metricRegistry)) {
return FluentIterable
.from(DataStoreStreaming.listTables(dataStore))
.transform(new Function
() {
@Override
public FileStatus apply(Table table) {
return getTableFileStatus(_rootPath, table.getName());
}
})
.toArray(FileStatus.class);
}
}
// Other than root only tables can be listed
if (getSplitName(_rootPath, path) != null) {
throw new IOException("Cannot list a split");
}
final String table = getTableName(_rootPath, path);
// Simulate a file for each split
Collection splits = getSplitsFromDataStore(table);
return FluentIterable.from(splits)
.transform(new Function() {
@Override
public FileStatus apply(String split) {
// Split length has no meaning, use max value to make it appear large since actual size is unknown
return getSplitFileStatus(_rootPath, table, split + ".gz", Long.MAX_VALUE, 1024);
}
})
.toArray(FileStatus.class);
}
@Override
public FileStatus getFileStatus(Path path)
throws IOException {
if (path.equals(_rootPath)) {
return getRootFileStatus(_rootPath);
}
String table = getTableName(_rootPath, path);
String split = getSplitName(_rootPath, path);
if (split == null) {
// This is a table. Even if the table doesn't exist still return a value.
return getTableFileStatus(_rootPath, table);
}
// This is a split. As before we're using max long for the split size.
return getSplitFileStatus(_rootPath, table, splitAsGzipFile(split), Long.MAX_VALUE, 1024);
}
@Override
public List getInputSplits(Configuration config, Path path, int splitSize)
throws IOException {
String table = getTableName(_rootPath, path);
ImmutableList.Builder splits = ImmutableList.builder();
Collection sourceSplits = getSplitsFromDataStore(table);
for (String split : sourceSplits) {
// Length is undefined and unused, use 1 for a simple positive value
splits.add(new SplitPath(getSplitPath(_rootPath, table, splitAsGzipFile(split)), 1));
}
return splits.build();
}
private Collection getSplitsFromDataStore(String table) {
try (CloseableDataStore dataStore = HadoopDataStoreManager.getInstance().getDataStore(_uri, _apiKey, _metricRegistry)) {
return dataStore.getSplits(table, _splitSize);
} catch (Exception e) {
// Return an empty collection of splits if the table does not exist
if (Iterables.any(Throwables.getCausalChain(e), Predicates.instanceOf(UnknownTableException.class))) {
return ImmutableList.of(getEmptySplitFileName());
}
throw Throwables.propagate(e);
}
}
@Override
public BaseRecordReader getBaseRecordReader(Configuration config, Path path, int splitSize)
throws IOException {
if (isEmptySplit(path)) {
return getEmptySplitRecordReader();
}
final String table = getTableName(_rootPath, path);
final String splitFile = getSplitName(_rootPath, path);
final String split = splitNameWithoutGzipExtension(splitFile);
final URI location = LocationUtil.toLocation(_uri, table);
return new BaseRecordReader(splitSize) {
private CloseableDataStore _dataStore;
@Override
protected Iterator