All Downloads are FREE. Search and download functionalities are using the official Maven repository.
Please wait. This can take some minutes ...
Many resources are needed to download a project. Please understand that we have to compensate our server costs. Thank you in advance.
Project price only 1 $
You can buy this project and download/modify it how often you want.
org.apache.zeppelin.submarine.hadoop.HdfsClient Maven / Gradle / Ivy
/*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.zeppelin.submarine.hadoop;
import org.apache.commons.lang3.StringUtils;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IOUtils;
import org.apache.hadoop.security.UserGroupInformation;
import org.apache.zeppelin.conf.ZeppelinConfiguration;
import org.apache.zeppelin.interpreter.thrift.ParagraphInfo;
import org.apache.zeppelin.submarine.commons.SubmarineConstants;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.io.File;
import java.io.FileWriter;
import java.io.IOException;
import java.io.ByteArrayOutputStream;
import java.io.ByteArrayInputStream;
import java.io.InputStream;
import java.net.URI;
import java.net.URISyntaxException;
import java.security.PrivilegedExceptionAction;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Properties;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
/**
* Hadoop FileSystem wrapper. Support both secure and no-secure mode
*/
public class HdfsClient {
private static Logger LOGGER = LoggerFactory.getLogger(HdfsClient.class);
private ZeppelinConfiguration zConf = ZeppelinConfiguration.create();
private Configuration hadoopConf;
private boolean isSecurityEnabled;
private FileSystem fs;
private static Pattern REPL_PATTERN =
Pattern.compile("(\\s*)%([\\w\\.]+)(\\(.*?\\))?.*", Pattern.DOTALL);
public HdfsClient(Properties properties) {
String krb5conf = properties.getProperty(SubmarineConstants.SUBMARINE_HADOOP_KRB5_CONF, "");
if (!StringUtils.isEmpty(krb5conf)) {
System.setProperty("java.security.krb5.conf", krb5conf);
}
this.hadoopConf = new Configuration();
// disable checksum for local file system. because interpreter.json may be updated by
// non-hadoop filesystem api
// disable caching for file:// scheme to avoid getting LocalFS which does CRC checks
// this.hadoopConf.setBoolean("fs.file.impl.disable.cache", true);
this.hadoopConf.set("fs.file.impl", "org.apache.hadoop.hdfs.DistributedFileSystem");
// UserGroupInformation.setConfiguration(hadoopConf);
this.isSecurityEnabled = UserGroupInformation.isSecurityEnabled();
if (isSecurityEnabled) {
String keytab = properties.getProperty(
SubmarineConstants.SUBMARINE_HADOOP_KEYTAB, "");
String principal = properties.getProperty(
SubmarineConstants.SUBMARINE_HADOOP_PRINCIPAL, "");
ZeppelinConfiguration zConf = ZeppelinConfiguration.create();
if (StringUtils.isEmpty(keytab)) {
keytab = zConf.getString(
ZeppelinConfiguration.ConfVars.ZEPPELIN_SERVER_KERBEROS_KEYTAB);
}
if (StringUtils.isEmpty(principal)) {
principal = zConf.getString(
ZeppelinConfiguration.ConfVars.ZEPPELIN_SERVER_KERBEROS_PRINCIPAL);
}
if (StringUtils.isBlank(keytab) || StringUtils.isBlank(principal)) {
throw new RuntimeException("keytab and principal can not be empty, keytab: " + keytab
+ ", principal: " + principal);
}
try {
UserGroupInformation.loginUserFromKeytab(principal, keytab);
} catch (IOException e) {
throw new RuntimeException("Fail to login via keytab:" + keytab +
", principal:" + principal, e);
} catch (Exception e) {
LOGGER.error(e.getMessage(), e);
}
}
try {
this.fs = FileSystem.get(new URI("/"), this.hadoopConf);
} catch (IOException e) {
LOGGER.error(e.getMessage(), e);
} catch (URISyntaxException e) {
LOGGER.error(e.getMessage(), e);
}
}
public FileSystem getFs() {
return fs;
}
public Path makeQualified(Path path) {
return fs.makeQualified(path);
}
public boolean exists(final Path path) throws IOException {
return callHdfsOperation(new HdfsOperation() {
@Override
public Boolean call() throws IOException {
return fs.exists(path);
}
});
}
public void tryMkDir(final Path dir) throws IOException {
callHdfsOperation(new HdfsOperation() {
@Override
public Void call() throws IOException {
if (!fs.exists(dir)) {
fs.mkdirs(dir);
LOGGER.info("Create dir {} in hdfs", dir.toString());
}
if (fs.isFile(dir)) {
throw new IOException(dir.toString() + " is file instead of directory, please remove " +
"it or specify another directory");
}
fs.mkdirs(dir);
return null;
}
});
}
public List list(final Path path) throws IOException {
return callHdfsOperation(new HdfsOperation>() {
@Override
public List call() throws IOException {
List paths = new ArrayList<>();
for (FileStatus status : fs.globStatus(path)) {
paths.add(status.getPath());
}
return paths;
}
});
}
// recursive search path, (list folder in sub folder on demand, instead of load all
// data when zeppelin server start)
public List listAll(final Path path) throws IOException {
return callHdfsOperation(new HdfsOperation>() {
@Override
public List call() throws IOException {
List paths = new ArrayList<>();
collectNoteFiles(path, paths);
return paths;
}
private void collectNoteFiles(Path folder, List noteFiles) throws IOException {
FileStatus[] paths = fs.listStatus(folder);
for (FileStatus path : paths) {
if (path.isDirectory()) {
collectNoteFiles(path.getPath(), noteFiles);
} else {
if (path.getPath().getName().endsWith(".zpln")) {
noteFiles.add(path.getPath());
} else {
LOGGER.warn("Unknown file: " + path.getPath());
}
}
}
}
});
}
public boolean delete(final Path path) throws IOException {
return callHdfsOperation(new HdfsOperation() {
@Override
public Boolean call() throws IOException {
return fs.delete(path, true);
}
});
}
public String readFile(final Path file) throws IOException {
return callHdfsOperation(new HdfsOperation() {
@Override
public String call() throws IOException {
LOGGER.debug("Read from file: " + file);
ByteArrayOutputStream noteBytes = new ByteArrayOutputStream();
IOUtils.copyBytes(fs.open(file), noteBytes, hadoopConf);
return new String(noteBytes.toString(
zConf.getString(ZeppelinConfiguration.ConfVars.ZEPPELIN_ENCODING)));
}
});
}
public void writeFile(final String content, final Path file)
throws IOException {
callHdfsOperation(new HdfsOperation() {
@Override
public Void call() throws IOException {
InputStream in = new ByteArrayInputStream(content.getBytes(
zConf.getString(ZeppelinConfiguration.ConfVars.ZEPPELIN_ENCODING)));
Path tmpFile = new Path(file.toString() + ".tmp");
IOUtils.copyBytes(in, fs.create(tmpFile), hadoopConf);
fs.delete(file, true);
fs.rename(tmpFile, file);
return null;
}
});
}
public void move(Path src, Path dest) throws IOException {
callHdfsOperation(() -> {
fs.rename(src, dest);
return null;
});
}
private interface HdfsOperation {
T call() throws IOException;
}
private synchronized T callHdfsOperation(final HdfsOperation func) throws IOException {
if (isSecurityEnabled) {
try {
return UserGroupInformation.getCurrentUser().doAs(new PrivilegedExceptionAction() {
@Override
public T run() throws Exception {
return func.call();
}
});
} catch (InterruptedException e) {
throw new IOException(e);
}
} else {
return func.call();
}
}
public String parseText(String text) {
String script = "", intpText = "";
// parse text to get interpreter component
if (text != null) {
Matcher matcher = REPL_PATTERN.matcher(text);
if (matcher.matches()) {
String headingSpace = matcher.group(1);
intpText = matcher.group(2);
if (matcher.groupCount() == 3 && matcher.group(3) != null) {
String localPropertiesText = matcher.group(3);
String[] splits = localPropertiesText.substring(1, localPropertiesText.length() - 1)
.split(",");
for (String split : splits) {
String[] kv = split.split("=");
if (StringUtils.isBlank(split) || kv.length == 0) {
continue;
}
if (kv.length > 2) {
throw new RuntimeException("Invalid paragraph properties format: " + split);
}
}
script = text.substring(headingSpace.length() + intpText.length() +
localPropertiesText.length() + 1).trim();
} else {
script = text.substring(headingSpace.length() + intpText.length() + 1).trim();
}
} else {
script = text.trim();
}
}
return script;
}
public String saveParagraphToFiles(String noteId, List paragraphInfos,
String dirName, Properties properties)
throws Exception {
StringBuffer outputMsg = new StringBuffer();
String hdfsUploadPath = properties.getProperty(
SubmarineConstants.SUBMARINE_ALGORITHM_HDFS_PATH, "");
HashMap mapParagraph = new HashMap<>();
for (int i = 0; i < paragraphInfos.size(); i++) {
ParagraphInfo paragraph = paragraphInfos.get(i);
String paragraphTitle = paragraph.getParagraphTitle();
if (org.apache.commons.lang3.StringUtils.isEmpty(paragraphTitle)) {
String message = "WARN: The title of the [" + i
+ "] paragraph is empty and was not submitted to HDFS.\n";
LOGGER.warn(message);
outputMsg.append(message);
continue;
}
if (!mapParagraph.containsKey(paragraphTitle)) {
StringBuffer mergeScript = new StringBuffer();
mapParagraph.put(paragraphTitle, mergeScript);
}
StringBuffer mergeScript = mapParagraph.get(paragraphTitle);
String parapraphText = paragraph.getParagraphText();
String text = parseText(parapraphText);
mergeScript.append(text + "\n\n");
}
// Clear all files in the local noteId directory
if (!org.apache.commons.lang3.StringUtils.isEmpty(dirName)) {
String noteDir = dirName + "/" + noteId;
File fileNoteDir = new File(noteDir);
if (fileNoteDir.exists()) {
fileNoteDir.delete();
}
fileNoteDir.mkdirs();
}
// Clear all files in the noteid directory in HDFS
if (!org.apache.commons.lang3.StringUtils.isEmpty(hdfsUploadPath)) {
Path hdfsPath = new Path(hdfsUploadPath + "/" + noteId);
try {
if (exists(hdfsPath)) {
delete(hdfsPath);
tryMkDir(hdfsPath);
}
} catch (IOException e) {
LOGGER.error(e.getMessage(), e);
throw new Exception(e);
}
}
for (Map.Entry entry : mapParagraph.entrySet()) {
try {
String fileName = entry.getKey();
String fileContext = entry.getValue().toString();
String paragraphFile = dirName + "/" + noteId + "/" + fileName;
// save to local file
if (!org.apache.commons.lang3.StringUtils.isEmpty(dirName)) {
File fileParagraph = new File(paragraphFile);
if (!fileParagraph.exists()) {
fileParagraph.createNewFile();
}
FileWriter writer = new FileWriter(paragraphFile);
writer.write(fileContext);
writer.close();
}
// save to hdfs
if (!StringUtils.isEmpty(hdfsUploadPath)) {
String fileDir = hdfsUploadPath + "/" + noteId + "/" + fileName;
// upload algorithm file
LOGGER.info("Commit algorithm to HDFS: {}", fileDir);
Path filePath = new Path(fileDir);
writeFile(fileContext, filePath);
}
} catch (IOException e) {
LOGGER.error(e.getMessage(), e);
throw new Exception(e);
}
}
return outputMsg.toString();
}
}