All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.hive.hplsql.Ftp Maven / Gradle / Ivy

There is a newer version: 4.0.1
Show newest version
/**
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package org.apache.hive.hplsql;

import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;
import java.util.ArrayList;
import java.util.Hashtable;
import java.util.concurrent.ConcurrentLinkedQueue;
import java.util.regex.Pattern;
import java.util.concurrent.atomic.AtomicInteger;
import java.util.concurrent.atomic.AtomicLong;

import org.apache.commons.net.ftp.FTP;
import org.apache.commons.net.ftp.FTPClient;
import org.apache.commons.net.ftp.FTPFile;
import org.antlr.v4.runtime.ParserRuleContext;

public class Ftp implements Runnable {  
  String host;
  String user;
  String pwd;
  String dir;
  String targetDir;
  String filePattern;
  boolean subdir = false;
  boolean local = false;
  boolean newOnly = false;
  int sessions = 1;
  
  int fileCnt = 0;
  int dirCnt = 0;
  long ftpSizeInBytes = 0;
  
  FTPClient ftp = null;
  ConcurrentLinkedQueue filesQueue = new ConcurrentLinkedQueue();
  Hashtable filesMap = new Hashtable();
  
  AtomicInteger currentFileCnt = new AtomicInteger(1);
  AtomicInteger currentThreadCnt = new AtomicInteger(0);
  AtomicInteger fileCntSuccess = new AtomicInteger(0);
  AtomicLong bytesTransferredAll = new AtomicLong(0);

  Exec exec;
  boolean trace = false;
  boolean info = false;
  
  Ftp(Exec e) {
    exec = e;  
    trace = exec.getTrace();
    info = exec.getInfo();
  }
  
  /**
   * Run COPY FROM FTP command
   */
  Integer run(HplsqlParser.Copy_from_ftp_stmtContext ctx) {
    trace(ctx, "COPY FROM FTP");
    initOptions(ctx);
    ftp = openConnection(ctx);
    if (ftp != null) {
      Timer timer = new Timer();
      timer.start();
      if (info) {
        info(ctx, "Retrieving directory listing");
      }
      retrieveFileList(dir);
      timer.stop();
      if (info) {
        info(ctx, "Files to copy: " + Utils.formatSizeInBytes(ftpSizeInBytes) + ", " + Utils.formatCnt(fileCnt, "file") + ", " + Utils.formatCnt(dirCnt, "subdirectory", "subdirectories") + " scanned (" + timer.format() + ")");
      }
      if (fileCnt > 0) {
        copyFiles(ctx);
      }
    }  
    return 0;
  }
  
  /**
   * Copy the specified files from FTP
   */
  void copyFiles(HplsqlParser.Copy_from_ftp_stmtContext ctx) {
    Timer timer = new Timer();
    timer.start();
    if (fileCnt > 1 && sessions > 1) {
      if (sessions > fileCnt) {
        sessions = fileCnt;
      }
      try {
        Thread threads[] = new Thread[sessions];
        for (int i = 0; i < sessions; i++) {
          threads[i] = new Thread(this);
          threads[i].start();
        }
        for (int i = 0; i < sessions; i++) {
          threads[i].join(); 
        }
      }
      catch(Exception e) { 
      }
    }
    else {            // Transfer files in the single session      
      run();      
    }
    if (info) {
      long elapsed = timer.stop();
      long bytesAll = bytesTransferredAll.get();
      info(ctx, "Transfer complete: " + Utils.formatSizeInBytes(bytesAll) + ", " + fileCntSuccess.get() + " files ok, " + (fileCnt - fileCntSuccess.get()) + " failed, "+ Utils.formatTime(elapsed) + ", " + Utils.formatBytesPerSec(bytesAll, elapsed));
    } 
  }
  
  /**
   * Run a thread to transfer files
   */
  public void run() {
    byte[] data = null;
    Timer timer = new Timer();
    FTPClient ftp = this.ftp;
    if (currentThreadCnt.getAndIncrement() > 0) {
      ftp = openConnection(null);
    }    
    while(true) {
      String file = filesQueue.poll();
      if (file == null) {
        break;
      }
      int num = currentFileCnt.getAndIncrement();
      FTPFile ftpFile = filesMap.get(file);
      long ftpSizeInBytes = ftpFile.getSize(); 
      String fmtSizeInBytes = Utils.formatSizeInBytes(ftpSizeInBytes);
      String targetFile = getTargetFileName(file); 
      if (info) {
        info(null, "  " + file + " - started (" + num + " of " + fileCnt + ", " + fmtSizeInBytes +")");
      }
      try {
        InputStream in = ftp.retrieveFileStream(file);
        OutputStream out = null;
        java.io.File targetLocalFile = null;
        File targetHdfsFile = null;
        if (local) {
          targetLocalFile = new java.io.File(targetFile);
          if (!targetLocalFile.exists()) {            
            targetLocalFile.getParentFile().mkdirs();            
            targetLocalFile.createNewFile();
          }
          out = new FileOutputStream(targetLocalFile, false /*append*/);
        }
        else {
          targetHdfsFile = new File();
          out = targetHdfsFile.create(targetFile, true /*overwrite*/);
        }  
        if (data == null) {
          data = new byte[3*1024*1024];
        }
        int bytesRead = -1;
        long bytesReadAll = 0;
        long start = timer.start();
        long prev = start;
        long readTime = 0;
        long writeTime = 0;
        long cur, cur2, cur3;
        while (true) {
          cur = timer.current();
          bytesRead = in.read(data);
          cur2 = timer.current();
          readTime += (cur2 - cur); 
          if (bytesRead == -1) {
            break;
          }        
          out.write(data, 0, bytesRead);
          out.flush();
          cur3 = timer.current();
          writeTime += (cur3 - cur2); 
          bytesReadAll += bytesRead;
          if (info) {
            cur = timer.current();
            if (cur - prev > 13000) {
              long elapsed = cur - start;
              info(null, "  " + file + " - in progress (" + Utils.formatSizeInBytes(bytesReadAll) + " of " + fmtSizeInBytes + ", " + Utils.formatPercent(bytesReadAll, ftpSizeInBytes) + ", " + Utils.formatTime(elapsed) + ", " + Utils.formatBytesPerSec(bytesReadAll, elapsed) + ", " + Utils.formatBytesPerSec(bytesReadAll, readTime) + " read, " + Utils.formatBytesPerSec(bytesReadAll, writeTime) + " write)");
              prev = cur;
            }
          }          
        }
        if (ftp.completePendingCommand()) {
          in.close();
          cur = timer.current();
          out.close();
          readTime += (timer.current() - cur); 
          bytesTransferredAll.addAndGet(bytesReadAll);
          fileCntSuccess.incrementAndGet();
          if (info) {
            long elapsed = timer.stop();
            info(null, "  " + file + " - complete (" + Utils.formatSizeInBytes(bytesReadAll) + ", " + Utils.formatTime(elapsed) + ", " + Utils.formatBytesPerSec(bytesReadAll, elapsed) + ", " + Utils.formatBytesPerSec(bytesReadAll, readTime) + " read, " + Utils.formatBytesPerSec(bytesReadAll, writeTime) + " write)");
          }
        } 
        else {
          in.close();
          out.close();
          if (info) {
            info(null, "  " + file + " - failed");
          }
          exec.signal(Signal.Type.SQLEXCEPTION, "File transfer failed: " + file);
        }
      }
      catch(IOException e) {
        exec.signal(e);
      }
    }
    try {
      if (ftp.isConnected()) {
        ftp.logout();
        ftp.disconnect();
      }
    } 
    catch (IOException e) {      
    }
  }
  
  /**
   * Get the list of files to transfer
   */
  void retrieveFileList(String dir) {
    if (info) {
      if (dir == null || dir.isEmpty()) {
        info(null, "  Listing the current working FTP directory");
      }
      else {
        info(null, "  Listing " + dir);
      }
    }
    try {
      FTPFile[] files = ftp.listFiles(dir);
      ArrayList dirs = new ArrayList();
      for (FTPFile file : files) {
        String name = file.getName();
        if (file.isFile()) {
          if (filePattern == null || Pattern.matches(filePattern, name)) {
            if (dir != null && !dir.isEmpty()) {
              if (dir.endsWith("/")) {
                name = dir + name;
              }
              else {
                name = dir + "/" + name;
              }
            }
            if (!newOnly || !isTargetExists(name)) {
              fileCnt++;
              ftpSizeInBytes += file.getSize();
              filesQueue.add(name);
              filesMap.put(name, file);
            }
          }
        }
        else {
          if (subdir && !name.equals(".") && !name.equals("..")) {
            dirCnt++;
            dirs.add(file);
          }
        }
      }
      if (subdir) {
        for (FTPFile d : dirs) {
          String sd = d.getName();
          if (dir != null && !dir.isEmpty()) {
            if (dir.endsWith("/")) {
              sd = dir + sd;
            }
            else {
              sd = dir + "/" + sd;
            }
          }
          retrieveFileList(sd);
        }
      }
    }
    catch (IOException e) {      
      exec.signal(e);
    }
  }
  
  /**
   * Open and initialize FTP
   */
  FTPClient openConnection(HplsqlParser.Copy_from_ftp_stmtContext ctx) {
	  FTPClient ftp = new FTPClient();
	  Timer timer = new Timer();
	  timer.start();
	  try {
	    ftp.connect(host);
	    ftp.enterLocalPassiveMode();
	    ftp.setFileType(FTP.BINARY_FILE_TYPE);
	    if (!ftp.login(user, pwd)) {    
	      if (ftp.isConnected()) {
	        ftp.disconnect();
	      }
	      exec.signal(Signal.Type.SQLEXCEPTION, "Cannot login to FTP server: " + host);
	      return null;
	    }
	    timer.stop();
	    if (info) {
	      info(ctx, "Connected to ftp: " + host + " (" + timer.format() + ")");
	    }
    }
	  catch (IOException e) {      
	    exec.signal(e);
	  }
	  return ftp;
  }
  
  /**
   * Check if the file already exists in the target file system
   */
  boolean isTargetExists(String name) {
    String target = getTargetFileName(name);
    try {
      if (local) {
        if (new java.io.File(target).exists()) {
          return true;
        }
      }
      else if (new File().exists(target)) {
        return true;
      }
    }
    catch(Exception e) {      
    }
    return false;        
  }
  
  /**
   * Get the target file relative path and name
   */
  String getTargetFileName(String file) {
    String outFile = file;
    // Remove source dir from file
    if (dir != null) {
      if (targetDir != null) {
        outFile = targetDir + file.substring(dir.length()); 
      }
      else {
        outFile = file.substring(dir.length());
      }
    }
    else if (targetDir != null) {
      outFile = targetDir + "/" + file;
    }
    return outFile;
  }
  
  /**
   * Initialize COPY FROM FTP command options
   */
  void initOptions(HplsqlParser.Copy_from_ftp_stmtContext ctx) {
    host = evalPop(ctx.expr()).toString();
    user = "anonymous";
    pwd = "";
    int cnt = ctx.copy_ftp_option().size();
    for (int i = 0; i < cnt; i++) {
      HplsqlParser.Copy_ftp_optionContext option = ctx.copy_ftp_option(i);
      if (option.T_USER() != null) {
        user = evalPop(option.expr()).toString();
      }
      else if (option.T_PWD() != null) {
        pwd = evalPop(option.expr()).toString();
      }
      else if (option.T_DIR() != null) {
        if (option.file_name() != null) {
          dir = option.file_name().getText();
        }
        else {
          dir = evalPop(option.expr()).toString();
        }
      }
      else if (option.T_FILES() != null) {
        filePattern = evalPop(option.expr()).toString();
      }
      else if (option.T_NEW() != null) {
        newOnly = true;
      }
      else if (option.T_SUBDIR() != null) {
        subdir = true;
      }
      else if (option.T_SESSIONS() != null) {
        sessions = evalPop(option.expr()).intValue();
      }
      else if (option.T_TO() != null) {
        if (option.file_name() != null) {
          targetDir = option.file_name().getText();
        }
        else {
          targetDir = evalPop(option.expr()).toString();
        }
        if (option.T_LOCAL() != null) {
          local = true;
        }
      }
    }
  }

  /**
   * Evaluate the expression and pop value from the stack
   */
  Var evalPop(ParserRuleContext ctx) {
    exec.visit(ctx);
    if (!exec.stack.isEmpty()) { 
      return exec.stackPop();
    }
    return Var.Empty;
  }

  /**
   * Trace and information
   */
  public void trace(ParserRuleContext ctx, String message) {
    exec.trace(ctx, message);
  }
  
  public void info(ParserRuleContext ctx, String message) {
    exec.info(ctx, message);
  }
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy