Many resources are needed to download a project. Please understand that we have to compensate our server costs. Thank you in advance. Project price only 1 $
You can buy this project and download/modify it how often you want.
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.flink.runtime.fs.maprfs;
import java.io.BufferedReader;
import java.io.File;
import java.io.FileReader;
import java.io.IOException;
import java.lang.reflect.Constructor;
import java.net.URI;
import java.util.ArrayList;
import java.util.List;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.apache.flink.core.fs.BlockLocation;
import org.apache.flink.core.fs.FSDataInputStream;
import org.apache.flink.core.fs.FSDataOutputStream;
import org.apache.flink.core.fs.FileStatus;
import org.apache.flink.core.fs.FileSystem;
import org.apache.flink.core.fs.Path;
import org.apache.flink.runtime.fs.hdfs.HadoopBlockLocation;
import org.apache.flink.runtime.fs.hdfs.HadoopDataInputStream;
import org.apache.flink.runtime.fs.hdfs.HadoopDataOutputStream;
import org.apache.flink.runtime.fs.hdfs.HadoopFileStatus;
/**
* Concrete implementation of the {@link FileSystem} base class for the MapR
* file system. The class contains MapR specific code to initialize the
* connection to the file system. Apart from that, we code mainly reuses the
* existing HDFS wrapper code.
*/
public final class MapRFileSystem extends FileSystem {
/**
* The log object used for debugging.
*/
private static final Logger LOG = LoggerFactory.getLogger(MapRFileSystem.class);
/**
* The name of MapR's class containing the implementation of the Hadoop HDFS
* interface.
*/
private static final String MAPR_FS_IMPL_CLASS = "com.mapr.fs.MapRFileSystem";
/**
* Name of the environment variable to determine the location of the MapR
* installation.
*/
private static final String MAPR_HOME_ENV = "MAPR_HOME";
/**
* The default location of the MapR installation.
*/
private static final String DEFAULT_MAPR_HOME = "/opt/mapr/";
/**
* The path relative to the MAPR_HOME where MapR stores how to access the
* configured clusters.
*/
private static final String MAPR_CLUSTER_CONF_FILE = "/conf/mapr-clusters.conf";
/**
* A Hadoop configuration object used during the file system initialization.
*/
private final org.apache.hadoop.conf.Configuration conf = new org.apache.hadoop.conf.Configuration();
/**
* The MapR class containing the implementation of the Hadoop HDFS
* interface.
*/
private final Class fsClass;
/**
* The MapR implementation of the Hadoop HDFS interface.
*/
private org.apache.hadoop.fs.FileSystem fs;
/**
* Creates a new MapRFileSystem object to access the MapR file system.
*
* @throws IOException
* throw if the required MapR classes cannot be found
*/
@SuppressWarnings("unchecked")
public MapRFileSystem() throws IOException {
if (LOG.isDebugEnabled()) {
LOG.debug(String.format(
"Trying to load class %s to access the MapR file system",
MAPR_FS_IMPL_CLASS));
}
try {
this.fsClass = (Class) Class
.forName(MAPR_FS_IMPL_CLASS);
} catch (Exception e) {
throw new IOException(
String.format(
"Cannot find class %s, probably the runtime was not compiled against the MapR Hadoop libraries",
MAPR_FS_IMPL_CLASS), e);
}
}
@Override
public Path getWorkingDirectory() {
return new Path(this.fs.getWorkingDirectory().toUri());
}
public Path getHomeDirectory() {
return new Path(this.fs.getHomeDirectory().toUri());
}
@Override
public URI getUri() {
return this.fs.getUri();
}
@Override
public void initialize(final URI path) throws IOException {
if (LOG.isInfoEnabled()) {
LOG.info(String.format("Initializing MapR file system for path %s",
path.toString()));
}
final String authority = path.getAuthority();
if (authority == null || authority.isEmpty()) {
// Use the default constructor to instantiate MapR file system
// object
try {
this.fs = this.fsClass.newInstance();
} catch (Exception e) {
throw new IOException(e);
}
} else {
// We have an authority, check the MapR cluster configuration to
// find the CLDB locations.
final String[] cldbLocations = getCLDBLocations(authority);
// Find the appropriate constructor
final Constructor constructor;
try {
constructor = this.fsClass.getConstructor(String.class,
String[].class);
} catch (NoSuchMethodException e) {
throw new IOException(e);
}
// Instantiate the file system object
try {
this.fs = constructor.newInstance(authority, cldbLocations);
} catch (Exception e) {
throw new IOException(e);
}
}
this.fs.initialize(path, this.conf);
}
/**
* Retrieves the CLDB locations for the given MapR cluster name
*
* @param authority
* the name of the MapR cluster
* @return a list of CLDB locations
* @throws IOException
* thrown if the CLDB locations for the given MapR cluster name
* cannot be determined
*/
private static String[] getCLDBLocations(final String authority)
throws IOException {
// Determine the MapR home
String maprHome = System.getenv(MAPR_HOME_ENV);
if (maprHome == null) {
maprHome = DEFAULT_MAPR_HOME;
}
final File maprClusterConf = new File(maprHome, MAPR_CLUSTER_CONF_FILE);
if (LOG.isDebugEnabled()) {
LOG.debug(String.format(
"Trying to retrieve MapR cluster configuration from %s",
maprClusterConf));
}
// Read the cluster configuration file, format is specified at
// http://doc.mapr.com/display/MapR/mapr-clusters.conf
BufferedReader br = null;
try {
br = new BufferedReader(new FileReader(maprClusterConf));
String line;
while ((line = br.readLine()) != null) {
// Normalize the string
line = line.trim();
line = line.replace('\t', ' ');
final String[] fields = line.split(" ");
if (fields == null) {
continue;
}
if (fields.length < 1) {
continue;
}
final String clusterName = fields[0];
if (!clusterName.equals(authority)) {
continue;
}
final List cldbLocations = new ArrayList();
for (int i = 1; i < fields.length; ++i) {
// Make sure this is not a key-value pair MapR recently
// introduced in the file format along with their security
// features.
if (!fields[i].isEmpty() && !fields[i].contains("=")) {
cldbLocations.add(fields[i]);
}
}
if (cldbLocations.isEmpty()) {
throw new IOException(
String.format(
"%s contains entry for cluster %s but no CLDB locations.",
maprClusterConf, authority));
}
return cldbLocations.toArray(new String[0]);
}
} finally {
if (br != null) {
br.close();
}
}
throw new IOException(String.format(
"Unable to find CLDB locations for cluster %s", authority));
}
@Override
public FileStatus getFileStatus(final Path f) throws IOException {
final org.apache.hadoop.fs.FileStatus status = this.fs
.getFileStatus(new org.apache.hadoop.fs.Path(f.toString()));
return new HadoopFileStatus(status);
}
@Override
public BlockLocation[] getFileBlockLocations(final FileStatus file,
final long start, final long len) throws IOException {
if (!(file instanceof HadoopFileStatus)) {
throw new IOException(
"file is not an instance of DistributedFileStatus");
}
final HadoopFileStatus f = (HadoopFileStatus) file;
final org.apache.hadoop.fs.BlockLocation[] blkLocations = fs
.getFileBlockLocations(f.getInternalFileStatus(), start, len);
// Wrap up HDFS specific block location objects
final HadoopBlockLocation[] distBlkLocations = new HadoopBlockLocation[blkLocations.length];
for (int i = 0; i < distBlkLocations.length; i++) {
distBlkLocations[i] = new HadoopBlockLocation(blkLocations[i]);
}
return distBlkLocations;
}
@Override
public FSDataInputStream open(final Path f, final int bufferSize)
throws IOException {
final org.apache.hadoop.fs.Path path = new org.apache.hadoop.fs.Path(f.toString());
final org.apache.hadoop.fs.FSDataInputStream fdis = this.fs.open(path, bufferSize);
return new HadoopDataInputStream(fdis);
}
@Override
public FSDataInputStream open(final Path f) throws IOException {
final org.apache.hadoop.fs.Path path = new org.apache.hadoop.fs.Path(f.toString());
final org.apache.hadoop.fs.FSDataInputStream fdis = this.fs.open(path);
return new HadoopDataInputStream(fdis);
}
@Override
public FSDataOutputStream create(final Path f, final boolean overwrite,
final int bufferSize, final short replication, final long blockSize)
throws IOException {
final org.apache.hadoop.fs.FSDataOutputStream fdos = this.fs.create(
new org.apache.hadoop.fs.Path(f.toString()), overwrite,
bufferSize, replication, blockSize);
return new HadoopDataOutputStream(fdos);
}
@Override
public FSDataOutputStream create(final Path f, final boolean overwrite)
throws IOException {
final org.apache.hadoop.fs.FSDataOutputStream fdos = this.fs.create(
new org.apache.hadoop.fs.Path(f.toString()), overwrite);
return new HadoopDataOutputStream(fdos);
}
@Override
public boolean delete(final Path f, final boolean recursive)
throws IOException {
return this.fs.delete(new org.apache.hadoop.fs.Path(f.toString()),
recursive);
}
@Override
public FileStatus[] listStatus(final Path f) throws IOException {
final org.apache.hadoop.fs.FileStatus[] hadoopFiles = this.fs
.listStatus(new org.apache.hadoop.fs.Path(f.toString()));
final FileStatus[] files = new FileStatus[hadoopFiles.length];
// Convert types
for (int i = 0; i < files.length; i++) {
files[i] = new HadoopFileStatus(hadoopFiles[i]);
}
return files;
}
@Override
public boolean mkdirs(final Path f) throws IOException {
return this.fs.mkdirs(new org.apache.hadoop.fs.Path(f.toString()));
}
@Override
public boolean rename(final Path src, final Path dst) throws IOException {
return this.fs.rename(new org.apache.hadoop.fs.Path(src.toString()),
new org.apache.hadoop.fs.Path(dst.toString()));
}
@SuppressWarnings("deprecation")
@Override
public long getDefaultBlockSize() {
return this.fs.getDefaultBlockSize();
}
@Override
public boolean isDistributedFS() {
return true;
}
}