
org.apache.drill.yarn.client.FileUploader Maven / Gradle / Ivy
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.drill.yarn.client;
import java.io.BufferedReader;
import java.io.File;
import java.io.IOException;
import java.io.InputStreamReader;
import java.io.PrintStream;
import java.util.Arrays;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import org.apache.drill.yarn.core.DfsFacade;
import org.apache.drill.yarn.core.DoYUtil;
import org.apache.drill.yarn.core.DoyConfigException;
import org.apache.drill.yarn.core.DrillOnYarnConfig;
import org.apache.drill.yarn.core.DfsFacade.DfsFacadeException;
import org.apache.drill.yarn.core.DfsFacade.Localizer;
import org.apache.hadoop.yarn.api.records.LocalResource;
import com.typesafe.config.Config;
/**
* Performs the file upload portion of the operation by uploading an archive to
* the target DFS system and directory. Records the uploaded archive so it may
* be used for localizing Drill in the launch step.
*
* Some of the code is a bit of a dance so we can get information early to
* display in status messages.
*
* This class handles x cases:
*
* - Non-localized, config in $DRILL_HOME/conf.
* - Non-localized, config in a site directory.
* - Localized, config in $DRILL_HOME.
* - Localized, config in a site directory.
*
*
* The non-localized case adds complexity, but is very handy when doing
* development as it avoids the wait for the archives to up- and down-load. The
* non-localized mode is not advertised to users as it defeats one of the main
* benefits of YARN.
*
* In the localized case, YARN is incomplete; there is no API to inform the AM
* of the set of localized files, so we pass the information along in
* environment variables. Also, tar is a bit annoying because it includes the
* root directory name when unpacking, so that the drill.tar.gz archive unpacks
* to, say, apache-drill.x.y.z. So, we must pass along the directory name as
* well.
*
* All of this is further complicated by the way YARN needs detailed information
* to localize resources, and that YARN uses a "key" to identify localized
* resources, which becomes the directory name in the task's working folder.
* Thus, Drill becomes, say
* $PWD/drill/apache-drill.x.y.z/bin, conf, ...
* YARN provides PWD. The Drillbit launch script needs to know the next two
* directory names.
*
* For efficiency, we omit uploading the Drill archive if one already exists in
* dfs and is the same size as the one on the client. We always upload the
* config archive (if needed) because config changes are likely to be one reason
* that someone (re)starts the Drill cluster.
*/
public abstract class FileUploader {
protected DrillOnYarnConfig doyConfig;
protected Config config;
protected DfsFacade dfs;
protected boolean dryRun;
protected boolean verbose;
protected File localDrillHome;
protected File localSiteDir;
protected File localDrillArchivePath;
public Map resources = new HashMap<>();
public String drillArchivePath;
public String siteArchivePath;
public String remoteDrillHome;
public String remoteSiteDir;
public static class NonLocalized extends FileUploader {
public NonLocalized(boolean dryRun, boolean verbose) {
super(dryRun, verbose);
}
@Override
public void run() throws ClientException {
setup();
prepareDrillHome();
if (hasSiteDir()) {
prepareSiteDir();
}
if (verbose || dryRun) {
dump(System.out);
}
}
private void prepareDrillHome() throws ClientException {
// We need the drill home property. The client can figure out the
// Drill home, but the AM must be told.
String drillHomeProp = config.getString(DrillOnYarnConfig.DRILL_HOME);
if (DoYUtil.isBlank(drillHomeProp)) {
System.out.println("Warning: non-localized run "
+ DrillOnYarnConfig.DRILL_HOME + " is not set.");
System.out.println(
"Assuming remote Drill home is the same as the local location: "
+ localDrillHome.getAbsolutePath());
}
}
private void prepareSiteDir() throws ClientException {
String siteDirProp = config.getString(DrillOnYarnConfig.SITE_DIR);
if (DoYUtil.isBlank(siteDirProp)) {
System.out.println("Warning: non-localized run "
+ DrillOnYarnConfig.SITE_DIR + " is not set.");
System.out.println(
"Assuming remote Drill site is the same as the local location: "
+ localSiteDir.getAbsolutePath());
}
}
}
public static class ReuseFiles extends FileUploader {
public ReuseFiles(boolean dryRun, boolean verbose) {
super(dryRun, verbose);
}
@Override
public void run() throws ClientException {
setup();
checkDrillArchive();
if (hasSiteDir()) {
checkSiteArchive();
}
if (verbose || dryRun) {
dump(System.out);
}
}
/**
* Upload the Drill archive if desired. Skip the upload if the file already
* exists in dfs and is the same size as the local file. However using the
* force option can force an upload even if the sizes match.
*
* Prepares the information needed to tell YARN and the AM about the
* localized archive.
*
* Note that the Drill archive is not created by this client; it must
* already exist on disk. Typically, it is just the archive downloaded from
* Apache or some other distribution. The uploaded archive retains the name
* of the archive in the client, which may be useful to check the version of
* the uploaded code based on the file name.
*
* @throws ClientException
*/
private void checkDrillArchive() throws ClientException {
// Print the progress message here because doing the connect takes
// a while and the message makes it look like we're doing something.
DfsFacade.Localizer localizer = makeDrillLocalizer();
connectToDfs();
try {
if (!localizer.destExists()) {
throw new ClientException(
"Drill archive not found in DFS: " + drillArchivePath);
}
} catch (IOException e) {
throw new ClientException(
"Failed to check existence of " + drillArchivePath, e);
}
if (!localDrillArchivePath.exists()) {
return;
}
if (!localizer.filesMatch()) {
System.out.println(
"Warning: Drill archive on DFS does not match the local version.");
}
defineResources(localizer, DrillOnYarnConfig.DRILL_ARCHIVE_KEY);
}
private void checkSiteArchive() throws ClientException {
// Print the progress message here because doing the connect takes
// a while and the message makes it look like we're doing something.
DfsFacade.Localizer localizer = makeSiteLocalizer(null);
try {
if (!localizer.destExists()) {
throw new ClientException(
"Drill archive not found in DFS: " + drillArchivePath);
}
} catch (IOException e) {
throw new ClientException(
"Failed to check existence of " + drillArchivePath, e);
}
defineResources(localizer, DrillOnYarnConfig.SITE_ARCHIVE_KEY);
}
}
public static class UploadFiles extends FileUploader {
private boolean force;
public UploadFiles(boolean force, boolean dryRun, boolean verbose) {
super(dryRun, verbose);
this.force = force;
}
@Override
public void run() throws ClientException {
setup();
uploadDrillArchive();
if (hasSiteDir()) {
uploadSite();
}
if (verbose || dryRun) {
dump(System.out);
}
}
/**
* Create a temporary archive of the site directory and upload it to DFS. We
* always upload the site; we never reuse an existing one.
*
* @throws ClientException
*/
private void uploadSite() throws ClientException {
File siteArchive = createSiteArchive();
try {
uploadSiteArchive(siteArchive);
} finally {
siteArchive.delete();
}
}
/**
* Upload the Drill archive if desired. Skip the upload if the file already
* exists in dfs and is the same size as the local file. However using the
* force option can force an upload even if the sizes match.
*
* Prepares the information needed to tell YARN and the AM about the
* localized archive.
*
* Note that the Drill archive is not created by this client; it must
* already exist on disk. Typically, it is just the archive downloaded from
* Apache or some other distribution. The uploaded archive retains the name
* of the archive in the client, which may be useful to check the version of
* the uploaded code based on the file name.
*
* @throws ClientException
*/
private void uploadDrillArchive() throws ClientException {
// Print the progress message here because doing the connect takes
// a while and the message makes it look like we're doing something.
connectToDfs();
DfsFacade.Localizer localizer = makeDrillLocalizer();
boolean needsUpload = force || !localizer.filesMatch();
if (needsUpload) {
// Thoroughly check the Drill archive. Errors with the archive seem a
// likely source of confusion, so provide detailed error messages for
// common cases. Don't bother with these checks if no upload is needed.
if (!localDrillArchivePath.exists()) {
throw new ClientException(
"Drill archive not found: " + localDrillArchivePath.getAbsolutePath());
}
if (!localDrillArchivePath.canRead()) {
throw new ClientException(
"Drill archive is not readable: " + localDrillArchivePath.getAbsolutePath());
}
if (localDrillArchivePath.isDirectory()) {
throw new ClientException(
"Drill archive cannot be a directory: " + localDrillArchivePath.getAbsolutePath());
}
}
drillArchivePath = localizer.getDestPath();
if (needsUpload) {
if (dryRun) {
System.out.print(
"Upload " + localDrillArchivePath.getAbsolutePath() + " to " + drillArchivePath);
} else {
System.out.print("Uploading " + localDrillArchivePath.getAbsolutePath() + " to "
+ drillArchivePath + " ... ");
upload(localizer);
}
} else {
System.out.println(
"Using existing Drill archive in DFS: " + drillArchivePath);
}
defineResources(localizer, DrillOnYarnConfig.DRILL_ARCHIVE_KEY);
}
/**
* Run the tar command to archive the site directory into a temporary
* archive which is then uploaded to DFS using a standardized name. The site
* directory is always uploaded since configuration is subject to frequent
* changes.
*
* @return
* @throws ClientException
*/
private File createSiteArchive() throws ClientException {
File siteArchiveFile;
try {
siteArchiveFile = File.createTempFile("drill-site-", ".tar.gz");
} catch (IOException e) {
throw new ClientException("Failed to create site archive temp file", e);
}
String cmd[] = new String[] { "tar", "-C", localSiteDir.getAbsolutePath(),
"-czf", siteArchiveFile.getAbsolutePath(), "." };
List cmdList = Arrays.asList(cmd);
String cmdLine = DoYUtil.join(" ", cmdList);
if (dryRun) {
System.out.print("Site archive command: ");
System.out.println(cmdLine);
return siteArchiveFile;
}
ProcessBuilder builder = new ProcessBuilder(cmdList);
builder.redirectErrorStream(true);
Process proc;
try {
proc = builder.start();
} catch (IOException e) {
throw new ClientException("Failed to launch tar process: " + cmdLine,
e);
}
// Should not be much output. But, we have to read it anyway to avoid
// blocking. We'll use the output if we encounter an error.
BufferedReader br = new BufferedReader(
new InputStreamReader(proc.getInputStream()));
StringBuilder buf = new StringBuilder();
try {
String line;
while ((line = br.readLine()) != null) {
buf.append(line);
buf.append("\n");
}
br.close();
} catch (IOException e) {
throw new ClientException("Failed to read output from tar command", e);
}
try {
proc.waitFor();
} catch (InterruptedException e) {
// Won't occur.
}
if (proc.exitValue() != 0) {
String msg = buf.toString().trim();
throw new ClientException("Tar of site directory failed: " + msg);
}
return siteArchiveFile;
}
/**
* Upload the site archive. For debugging, the client provides the option to
* use existing files, which users should not do in production.
*
* @param siteArchive
* @throws ClientException
*/
private void uploadSiteArchive(File siteArchive) throws ClientException {
DfsFacade.Localizer localizer = makeSiteLocalizer(siteArchive);
if (dryRun) {
System.out.println("Upload site archive to " + siteArchivePath);
} else {
System.out
.print("Uploading site directory " + localSiteDir.getAbsolutePath() +
" to " + siteArchivePath + " ... ");
upload(localizer);
}
defineResources(localizer, DrillOnYarnConfig.SITE_ARCHIVE_KEY);
}
}
public FileUploader(boolean dryRun, boolean verbose) {
doyConfig = DrillOnYarnConfig.instance();
this.config = doyConfig.getConfig();
this.dryRun = dryRun;
this.verbose = verbose;
}
public abstract void run() throws ClientException;
/**
* Common setup of the Drill and site directories.
*
* @throws ClientException
*/
protected void setup() throws ClientException {
// Local and remote Drill home locations.
localDrillHome = doyConfig.getLocalDrillHome();
try {
remoteDrillHome = doyConfig.getRemoteDrillHome();
} catch (DoyConfigException e) {
throw new ClientException(e);
}
// Site directory is optional. Local and remote locations, if provided.
// Check that the site directory is an existing directory.
localSiteDir = doyConfig.getLocalSiteDir();
if (hasSiteDir()) {
if (!localSiteDir.isDirectory()) {
throw new ClientException(
"Drill site dir not a directory: " + localSiteDir);
}
remoteSiteDir = doyConfig.getRemoteSiteDir();
}
// Disclaimer that this is just a dry run when that option is selected.
if (dryRun) {
System.out.println("Dry run only.");
}
}
public boolean hasSiteDir() {
return localSiteDir != null;
}
/**
* Report whether the user wants to localize (upload) Drill files, or just use
* files already on the worker nodes.
*
* @return
*/
public boolean isLocalized() {
return config.getBoolean(DrillOnYarnConfig.LOCALIZE_DRILL);
}
protected void connectToDfs() throws ClientException {
try {
System.out.print("Connecting to DFS...");
dfs = new DfsFacade(config);
dfs.connect();
System.out.println(" Connected.");
} catch (DfsFacadeException e) {
System.out.println("Failed.");
throw new ClientException("Failed to connect to DFS", e);
}
}
protected Localizer makeDrillLocalizer() throws ClientException {
String localArchivePath = config
.getString(DrillOnYarnConfig.DRILL_ARCHIVE_PATH);
if (DoYUtil.isBlank(localArchivePath)) {
throw new ClientException("Drill archive path ("
+ DrillOnYarnConfig.DRILL_ARCHIVE_PATH + ") is not set.");
}
// Archive is either absolute, or relative to $DRILL_HOME.
localDrillArchivePath = new File(localArchivePath);
if (!localDrillArchivePath.isAbsolute()) {
localDrillArchivePath = new File(
DrillOnYarnConfig.instance().getLocalDrillHome(), localArchivePath);
}
DfsFacade.Localizer localizer = new DfsFacade.Localizer(dfs,
localDrillArchivePath, "Drill");
drillArchivePath = localizer.getDestPath();
return localizer;
}
protected Localizer makeSiteLocalizer(File siteArchive) {
DfsFacade.Localizer localizer = new DfsFacade.Localizer(dfs, siteArchive,
DrillOnYarnConfig.SITE_ARCHIVE_NAME, "Site");
siteArchivePath = localizer.getDestPath();
return localizer;
}
protected void upload(Localizer localizer) throws ClientException {
try {
localizer.upload();
} catch (DfsFacadeException e) {
System.out.println("Failed.");
throw new ClientException(
"Failed to upload " + localizer.getLabel() + " archive", e);
}
System.out.println("Uploaded.");
}
protected void defineResources(Localizer localizer, String keyProp)
throws ClientException {
String key = config.getString(keyProp);
try {
localizer.defineResources(resources, key);
} catch (DfsFacadeException e) {
throw new ClientException(
"Failed to get DFS status for " + localizer.getLabel() + " archive",
e);
}
}
protected void dump(PrintStream out) {
out.print("Localized: ");
out.println((isLocalized()) ? "Yes" : "No");
out.print("Has Site Dir: ");
out.println((hasSiteDir()) ? "Yes" : "No");
out.print("Local Drill home: ");
out.println(localDrillHome.getAbsolutePath());
out.print("Remote Drill home: ");
out.println(remoteDrillHome);
if (hasSiteDir()) {
out.print("Local Site dir: ");
out.println(localSiteDir.getAbsolutePath());
out.print("Remote Site dir: ");
out.println(remoteSiteDir);
}
if (isLocalized()) {
out.print("Drill archive DFS path: ");
out.println(drillArchivePath);
if (hasSiteDir()) {
out.print("Site archive DFS path: ");
out.println(siteArchivePath);
}
}
}
}